| //===- OpenMPToLLVMIRTranslation.cpp - Translate OpenMP dialect to LLVM IR-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements a translation between the MLIR OpenMP dialect and LLVM |
| // IR. |
| // |
| //===----------------------------------------------------------------------===// |
| #include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h" |
| #include "mlir/Analysis/TopologicalSortUtils.h" |
| #include "mlir/Dialect/LLVMIR/LLVMDialect.h" |
| #include "mlir/Dialect/OpenMP/OpenMPDialect.h" |
| #include "mlir/Dialect/OpenMP/OpenMPInterfaces.h" |
| #include "mlir/IR/IRMapping.h" |
| #include "mlir/IR/Operation.h" |
| #include "mlir/Support/LLVM.h" |
| #include "mlir/Support/LogicalResult.h" |
| #include "mlir/Target/LLVMIR/Dialect/OpenMPCommon.h" |
| #include "mlir/Target/LLVMIR/ModuleTranslation.h" |
| #include "mlir/Transforms/RegionUtils.h" |
| |
| #include "llvm/ADT/ArrayRef.h" |
| #include "llvm/ADT/SetVector.h" |
| #include "llvm/ADT/TypeSwitch.h" |
| #include "llvm/Frontend/OpenMP/OMPConstants.h" |
| #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" |
| #include "llvm/IR/DebugInfoMetadata.h" |
| #include "llvm/IR/IRBuilder.h" |
| #include "llvm/Support/FileSystem.h" |
| #include "llvm/TargetParser/Triple.h" |
| #include "llvm/Transforms/Utils/ModuleUtils.h" |
| |
| #include <any> |
| #include <cstdint> |
| #include <iterator> |
| #include <numeric> |
| #include <optional> |
| #include <utility> |
| |
| using namespace mlir; |
| |
| namespace { |
| static llvm::omp::ScheduleKind |
| convertToScheduleKind(std::optional<omp::ClauseScheduleKind> schedKind) { |
| if (!schedKind.has_value()) |
| return llvm::omp::OMP_SCHEDULE_Default; |
| switch (schedKind.value()) { |
| case omp::ClauseScheduleKind::Static: |
| return llvm::omp::OMP_SCHEDULE_Static; |
| case omp::ClauseScheduleKind::Dynamic: |
| return llvm::omp::OMP_SCHEDULE_Dynamic; |
| case omp::ClauseScheduleKind::Guided: |
| return llvm::omp::OMP_SCHEDULE_Guided; |
| case omp::ClauseScheduleKind::Auto: |
| return llvm::omp::OMP_SCHEDULE_Auto; |
| case omp::ClauseScheduleKind::Runtime: |
| return llvm::omp::OMP_SCHEDULE_Runtime; |
| } |
| llvm_unreachable("unhandled schedule clause argument"); |
| } |
| |
| /// ModuleTranslation stack frame for OpenMP operations. This keeps track of the |
| /// insertion points for allocas. |
| class OpenMPAllocaStackFrame |
| : public LLVM::ModuleTranslation::StackFrameBase<OpenMPAllocaStackFrame> { |
| public: |
| MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPAllocaStackFrame) |
| |
| explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP) |
| : allocaInsertPoint(allocaIP) {} |
| llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint; |
| }; |
| |
| /// ModuleTranslation stack frame containing the partial mapping between MLIR |
| /// values and their LLVM IR equivalents. |
| class OpenMPVarMappingStackFrame |
| : public LLVM::ModuleTranslation::StackFrameBase< |
| OpenMPVarMappingStackFrame> { |
| public: |
| MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPVarMappingStackFrame) |
| |
| explicit OpenMPVarMappingStackFrame( |
| const DenseMap<Value, llvm::Value *> &mapping) |
| : mapping(mapping) {} |
| |
| DenseMap<Value, llvm::Value *> mapping; |
| }; |
| } // namespace |
| |
| /// Find the insertion point for allocas given the current insertion point for |
| /// normal operations in the builder. |
| static llvm::OpenMPIRBuilder::InsertPointTy |
| findAllocaInsertPoint(llvm::IRBuilderBase &builder, |
| const LLVM::ModuleTranslation &moduleTranslation) { |
| // If there is an alloca insertion point on stack, i.e. we are in a nested |
| // operation and a specific point was provided by some surrounding operation, |
| // use it. |
| llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint; |
| WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>( |
| [&](const OpenMPAllocaStackFrame &frame) { |
| allocaInsertPoint = frame.allocaInsertPoint; |
| return WalkResult::interrupt(); |
| }); |
| if (walkResult.wasInterrupted()) |
| return allocaInsertPoint; |
| |
| // Otherwise, insert to the entry block of the surrounding function. |
| // If the current IRBuilder InsertPoint is the function's entry, it cannot |
| // also be used for alloca insertion which would result in insertion order |
| // confusion. Create a new BasicBlock for the Builder and use the entry block |
| // for the allocs. |
| // TODO: Create a dedicated alloca BasicBlock at function creation such that |
| // we do not need to move the current InertPoint here. |
| if (builder.GetInsertBlock() == |
| &builder.GetInsertBlock()->getParent()->getEntryBlock()) { |
| assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() && |
| "Assuming end of basic block"); |
| llvm::BasicBlock *entryBB = llvm::BasicBlock::Create( |
| builder.getContext(), "entry", builder.GetInsertBlock()->getParent(), |
| builder.GetInsertBlock()->getNextNode()); |
| builder.CreateBr(entryBB); |
| builder.SetInsertPoint(entryBB); |
| } |
| |
| llvm::BasicBlock &funcEntryBlock = |
| builder.GetInsertBlock()->getParent()->getEntryBlock(); |
| return llvm::OpenMPIRBuilder::InsertPointTy( |
| &funcEntryBlock, funcEntryBlock.getFirstInsertionPt()); |
| } |
| |
| /// Converts the given region that appears within an OpenMP dialect operation to |
| /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the |
| /// region, and a branch from any block with an successor-less OpenMP terminator |
| /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes |
| /// of the continuation block if provided. |
| static llvm::BasicBlock *convertOmpOpRegions( |
| Region ®ion, StringRef blockName, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus, |
| SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) { |
| llvm::BasicBlock *continuationBlock = |
| splitBB(builder, true, "omp.region.cont"); |
| llvm::BasicBlock *sourceBlock = builder.GetInsertBlock(); |
| |
| llvm::LLVMContext &llvmContext = builder.getContext(); |
| for (Block &bb : region) { |
| llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create( |
| llvmContext, blockName, builder.GetInsertBlock()->getParent(), |
| builder.GetInsertBlock()->getNextNode()); |
| moduleTranslation.mapBlock(&bb, llvmBB); |
| } |
| |
| llvm::Instruction *sourceTerminator = sourceBlock->getTerminator(); |
| |
| // Terminators (namely YieldOp) may be forwarding values to the region that |
| // need to be available in the continuation block. Collect the types of these |
| // operands in preparation of creating PHI nodes. |
| SmallVector<llvm::Type *> continuationBlockPHITypes; |
| bool operandsProcessed = false; |
| unsigned numYields = 0; |
| for (Block &bb : region.getBlocks()) { |
| if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) { |
| if (!operandsProcessed) { |
| for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) { |
| continuationBlockPHITypes.push_back( |
| moduleTranslation.convertType(yield->getOperand(i).getType())); |
| } |
| operandsProcessed = true; |
| } else { |
| assert(continuationBlockPHITypes.size() == yield->getNumOperands() && |
| "mismatching number of values yielded from the region"); |
| for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) { |
| llvm::Type *operandType = |
| moduleTranslation.convertType(yield->getOperand(i).getType()); |
| (void)operandType; |
| assert(continuationBlockPHITypes[i] == operandType && |
| "values of mismatching types yielded from the region"); |
| } |
| } |
| numYields++; |
| } |
| } |
| |
| // Insert PHI nodes in the continuation block for any values forwarded by the |
| // terminators in this region. |
| if (!continuationBlockPHITypes.empty()) |
| assert( |
| continuationBlockPHIs && |
| "expected continuation block PHIs if converted regions yield values"); |
| if (continuationBlockPHIs) { |
| llvm::IRBuilderBase::InsertPointGuard guard(builder); |
| continuationBlockPHIs->reserve(continuationBlockPHITypes.size()); |
| builder.SetInsertPoint(continuationBlock, continuationBlock->begin()); |
| for (llvm::Type *ty : continuationBlockPHITypes) |
| continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields)); |
| } |
| |
| // Convert blocks one by one in topological order to ensure |
| // defs are converted before uses. |
| SetVector<Block *> blocks = getBlocksSortedByDominance(region); |
| for (Block *bb : blocks) { |
| llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb); |
| // Retarget the branch of the entry block to the entry block of the |
| // converted region (regions are single-entry). |
| if (bb->isEntryBlock()) { |
| assert(sourceTerminator->getNumSuccessors() == 1 && |
| "provided entry block has multiple successors"); |
| assert(sourceTerminator->getSuccessor(0) == continuationBlock && |
| "ContinuationBlock is not the successor of the entry block"); |
| sourceTerminator->setSuccessor(0, llvmBB); |
| } |
| |
| llvm::IRBuilderBase::InsertPointGuard guard(builder); |
| if (failed( |
| moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder))) { |
| bodyGenStatus = failure(); |
| return continuationBlock; |
| } |
| |
| // Special handling for `omp.yield` and `omp.terminator` (we may have more |
| // than one): they return the control to the parent OpenMP dialect operation |
| // so replace them with the branch to the continuation block. We handle this |
| // here to avoid relying inter-function communication through the |
| // ModuleTranslation class to set up the correct insertion point. This is |
| // also consistent with MLIR's idiom of handling special region terminators |
| // in the same code that handles the region-owning operation. |
| Operation *terminator = bb->getTerminator(); |
| if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) { |
| builder.CreateBr(continuationBlock); |
| |
| for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i) |
| (*continuationBlockPHIs)[i]->addIncoming( |
| moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB); |
| } |
| } |
| // After all blocks have been traversed and values mapped, connect the PHI |
| // nodes to the results of preceding blocks. |
| LLVM::detail::connectPHINodes(region, moduleTranslation); |
| |
| // Remove the blocks and values defined in this region from the mapping since |
| // they are not visible outside of this region. This allows the same region to |
| // be converted several times, that is cloned, without clashes, and slightly |
| // speeds up the lookups. |
| moduleTranslation.forgetMapping(region); |
| |
| return continuationBlock; |
| } |
| |
| /// Convert ProcBindKind from MLIR-generated enum to LLVM enum. |
| static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) { |
| switch (kind) { |
| case omp::ClauseProcBindKind::Close: |
| return llvm::omp::ProcBindKind::OMP_PROC_BIND_close; |
| case omp::ClauseProcBindKind::Master: |
| return llvm::omp::ProcBindKind::OMP_PROC_BIND_master; |
| case omp::ClauseProcBindKind::Primary: |
| return llvm::omp::ProcBindKind::OMP_PROC_BIND_primary; |
| case omp::ClauseProcBindKind::Spread: |
| return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread; |
| } |
| llvm_unreachable("Unknown ClauseProcBindKind kind"); |
| } |
| |
| /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder. |
| static LogicalResult |
| convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
| // TODO: support error propagation in OpenMPIRBuilder and use it instead of |
| // relying on captured variables. |
| LogicalResult bodyGenStatus = success(); |
| |
| auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { |
| // MasterOp has only one region associated with it. |
| auto ®ion = cast<omp::MasterOp>(opInst).getRegion(); |
| builder.restoreIP(codeGenIP); |
| convertOmpOpRegions(region, "omp.master.region", builder, moduleTranslation, |
| bodyGenStatus); |
| }; |
| |
| // TODO: Perform finalization actions for variables. This has to be |
| // called for variables which have destructors/finalizers. |
| auto finiCB = [&](InsertPointTy codeGenIP) {}; |
| |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMaster( |
| ompLoc, bodyGenCB, finiCB)); |
| return success(); |
| } |
| |
| /// Converts an OpenMP 'critical' operation into LLVM IR using OpenMPIRBuilder. |
| static LogicalResult |
| convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
| auto criticalOp = cast<omp::CriticalOp>(opInst); |
| // TODO: support error propagation in OpenMPIRBuilder and use it instead of |
| // relying on captured variables. |
| LogicalResult bodyGenStatus = success(); |
| |
| auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { |
| // CriticalOp has only one region associated with it. |
| auto ®ion = cast<omp::CriticalOp>(opInst).getRegion(); |
| builder.restoreIP(codeGenIP); |
| convertOmpOpRegions(region, "omp.critical.region", builder, |
| moduleTranslation, bodyGenStatus); |
| }; |
| |
| // TODO: Perform finalization actions for variables. This has to be |
| // called for variables which have destructors/finalizers. |
| auto finiCB = [&](InsertPointTy codeGenIP) {}; |
| |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext(); |
| llvm::Constant *hint = nullptr; |
| |
| // If it has a name, it probably has a hint too. |
| if (criticalOp.getNameAttr()) { |
| // The verifiers in OpenMP Dialect guarentee that all the pointers are |
| // non-null |
| auto symbolRef = cast<SymbolRefAttr>(criticalOp.getNameAttr()); |
| auto criticalDeclareOp = |
| SymbolTable::lookupNearestSymbolFrom<omp::CriticalDeclareOp>(criticalOp, |
| symbolRef); |
| hint = llvm::ConstantInt::get( |
| llvm::Type::getInt32Ty(llvmContext), |
| static_cast<int>(criticalDeclareOp.getHintVal())); |
| } |
| builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createCritical( |
| ompLoc, bodyGenCB, finiCB, criticalOp.getName().value_or(""), hint)); |
| return success(); |
| } |
| |
| /// Populates `reductions` with reduction declarations used in the given loop. |
| template <typename T> |
| static void |
| collectReductionDecls(T loop, |
| SmallVectorImpl<omp::DeclareReductionOp> &reductions) { |
| std::optional<ArrayAttr> attr = loop.getReductions(); |
| if (!attr) |
| return; |
| |
| reductions.reserve(reductions.size() + loop.getNumReductionVars()); |
| for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) { |
| reductions.push_back( |
| SymbolTable::lookupNearestSymbolFrom<omp::DeclareReductionOp>( |
| loop, symbolRef)); |
| } |
| } |
| |
| /// Translates the blocks contained in the given region and appends them to at |
| /// the current insertion point of `builder`. The operations of the entry block |
| /// are appended to the current insertion block. If set, `continuationBlockArgs` |
| /// is populated with translated values that correspond to the values |
| /// omp.yield'ed from the region. |
| static LogicalResult inlineConvertOmpRegions( |
| Region ®ion, StringRef blockName, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation, |
| SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) { |
| if (region.empty()) |
| return success(); |
| |
| // Special case for single-block regions that don't create additional blocks: |
| // insert operations without creating additional blocks. |
| if (llvm::hasSingleElement(region)) { |
| llvm::Instruction *potentialTerminator = |
| builder.GetInsertBlock()->empty() ? nullptr |
| : &builder.GetInsertBlock()->back(); |
| |
| if (potentialTerminator && potentialTerminator->isTerminator()) |
| potentialTerminator->removeFromParent(); |
| moduleTranslation.mapBlock(®ion.front(), builder.GetInsertBlock()); |
| |
| if (failed(moduleTranslation.convertBlock( |
| region.front(), /*ignoreArguments=*/true, builder))) |
| return failure(); |
| |
| // The continuation arguments are simply the translated terminator operands. |
| if (continuationBlockArgs) |
| llvm::append_range( |
| *continuationBlockArgs, |
| moduleTranslation.lookupValues(region.front().back().getOperands())); |
| |
| // Drop the mapping that is no longer necessary so that the same region can |
| // be processed multiple times. |
| moduleTranslation.forgetMapping(region); |
| |
| if (potentialTerminator && potentialTerminator->isTerminator()) |
| potentialTerminator->insertAfter(&builder.GetInsertBlock()->back()); |
| |
| return success(); |
| } |
| |
| LogicalResult bodyGenStatus = success(); |
| SmallVector<llvm::PHINode *> phis; |
| llvm::BasicBlock *continuationBlock = convertOmpOpRegions( |
| region, blockName, builder, moduleTranslation, bodyGenStatus, &phis); |
| if (failed(bodyGenStatus)) |
| return failure(); |
| if (continuationBlockArgs) |
| llvm::append_range(*continuationBlockArgs, phis); |
| builder.SetInsertPoint(continuationBlock, |
| continuationBlock->getFirstInsertionPt()); |
| return success(); |
| } |
| |
| namespace { |
| /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to |
| /// store lambdas with capture. |
| using OwningReductionGen = std::function<llvm::OpenMPIRBuilder::InsertPointTy( |
| llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *, |
| llvm::Value *&)>; |
| using OwningAtomicReductionGen = |
| std::function<llvm::OpenMPIRBuilder::InsertPointTy( |
| llvm::OpenMPIRBuilder::InsertPointTy, llvm::Type *, llvm::Value *, |
| llvm::Value *)>; |
| } // namespace |
| |
| /// Create an OpenMPIRBuilder-compatible reduction generator for the given |
| /// reduction declaration. The generator uses `builder` but ignores its |
| /// insertion point. |
| static OwningReductionGen |
| makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| // The lambda is mutable because we need access to non-const methods of decl |
| // (which aren't actually mutating it), and we must capture decl by-value to |
| // avoid the dangling reference after the parent function returns. |
| OwningReductionGen gen = |
| [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, |
| llvm::Value *lhs, llvm::Value *rhs, |
| llvm::Value *&result) mutable { |
| Region &reductionRegion = decl.getReductionRegion(); |
| moduleTranslation.mapValue(reductionRegion.front().getArgument(0), lhs); |
| moduleTranslation.mapValue(reductionRegion.front().getArgument(1), rhs); |
| builder.restoreIP(insertPoint); |
| SmallVector<llvm::Value *> phis; |
| if (failed(inlineConvertOmpRegions(reductionRegion, |
| "omp.reduction.nonatomic.body", |
| builder, moduleTranslation, &phis))) |
| return llvm::OpenMPIRBuilder::InsertPointTy(); |
| assert(phis.size() == 1); |
| result = phis[0]; |
| return builder.saveIP(); |
| }; |
| return gen; |
| } |
| |
| /// Create an OpenMPIRBuilder-compatible atomic reduction generator for the |
| /// given reduction declaration. The generator uses `builder` but ignores its |
| /// insertion point. Returns null if there is no atomic region available in the |
| /// reduction declaration. |
| static OwningAtomicReductionGen |
| makeAtomicReductionGen(omp::DeclareReductionOp decl, |
| llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| if (decl.getAtomicReductionRegion().empty()) |
| return OwningAtomicReductionGen(); |
| |
| // The lambda is mutable because we need access to non-const methods of decl |
| // (which aren't actually mutating it), and we must capture decl by-value to |
| // avoid the dangling reference after the parent function returns. |
| OwningAtomicReductionGen atomicGen = |
| [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Type *, |
| llvm::Value *lhs, llvm::Value *rhs) mutable { |
| Region &atomicRegion = decl.getAtomicReductionRegion(); |
| moduleTranslation.mapValue(atomicRegion.front().getArgument(0), lhs); |
| moduleTranslation.mapValue(atomicRegion.front().getArgument(1), rhs); |
| builder.restoreIP(insertPoint); |
| SmallVector<llvm::Value *> phis; |
| if (failed(inlineConvertOmpRegions(atomicRegion, |
| "omp.reduction.atomic.body", builder, |
| moduleTranslation, &phis))) |
| return llvm::OpenMPIRBuilder::InsertPointTy(); |
| assert(phis.empty()); |
| return builder.saveIP(); |
| }; |
| return atomicGen; |
| } |
| |
| /// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder. |
| static LogicalResult |
| convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| auto orderedOp = cast<omp::OrderedOp>(opInst); |
| |
| omp::ClauseDepend dependType = *orderedOp.getDependTypeVal(); |
| bool isDependSource = dependType == omp::ClauseDepend::dependsource; |
| unsigned numLoops = *orderedOp.getNumLoopsVal(); |
| SmallVector<llvm::Value *> vecValues = |
| moduleTranslation.lookupValues(orderedOp.getDependVecVars()); |
| |
| size_t indexVecValues = 0; |
| while (indexVecValues < vecValues.size()) { |
| SmallVector<llvm::Value *> storeValues; |
| storeValues.reserve(numLoops); |
| for (unsigned i = 0; i < numLoops; i++) { |
| storeValues.push_back(vecValues[indexVecValues]); |
| indexVecValues++; |
| } |
| llvm::OpenMPIRBuilder::InsertPointTy allocaIP = |
| findAllocaInsertPoint(builder, moduleTranslation); |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend( |
| ompLoc, allocaIP, numLoops, storeValues, ".cnt.addr", isDependSource)); |
| } |
| return success(); |
| } |
| |
| /// Converts an OpenMP 'ordered_region' operation into LLVM IR using |
| /// OpenMPIRBuilder. |
| static LogicalResult |
| convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
| auto orderedRegionOp = cast<omp::OrderedRegionOp>(opInst); |
| |
| // TODO: The code generation for ordered simd directive is not supported yet. |
| if (orderedRegionOp.getSimd()) |
| return failure(); |
| |
| // TODO: support error propagation in OpenMPIRBuilder and use it instead of |
| // relying on captured variables. |
| LogicalResult bodyGenStatus = success(); |
| |
| auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { |
| // OrderedOp has only one region associated with it. |
| auto ®ion = cast<omp::OrderedRegionOp>(opInst).getRegion(); |
| builder.restoreIP(codeGenIP); |
| convertOmpOpRegions(region, "omp.ordered.region", builder, |
| moduleTranslation, bodyGenStatus); |
| }; |
| |
| // TODO: Perform finalization actions for variables. This has to be |
| // called for variables which have destructors/finalizers. |
| auto finiCB = [&](InsertPointTy codeGenIP) {}; |
| |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| builder.restoreIP( |
| moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd( |
| ompLoc, bodyGenCB, finiCB, !orderedRegionOp.getSimd())); |
| return bodyGenStatus; |
| } |
| |
| static LogicalResult |
| convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
| using StorableBodyGenCallbackTy = |
| llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; |
| |
| auto sectionsOp = cast<omp::SectionsOp>(opInst); |
| |
| // TODO: Support the following clauses: private, firstprivate, lastprivate, |
| // reduction, allocate |
| if (!sectionsOp.getReductionVars().empty() || sectionsOp.getReductions() || |
| !sectionsOp.getAllocateVars().empty() || |
| !sectionsOp.getAllocatorsVars().empty()) |
| return emitError(sectionsOp.getLoc()) |
| << "reduction and allocate clauses are not supported for sections " |
| "construct"; |
| |
| LogicalResult bodyGenStatus = success(); |
| SmallVector<StorableBodyGenCallbackTy> sectionCBs; |
| |
| for (Operation &op : *sectionsOp.getRegion().begin()) { |
| auto sectionOp = dyn_cast<omp::SectionOp>(op); |
| if (!sectionOp) // omp.terminator |
| continue; |
| |
| Region ®ion = sectionOp.getRegion(); |
| auto sectionCB = [®ion, &builder, &moduleTranslation, &bodyGenStatus]( |
| InsertPointTy allocaIP, InsertPointTy codeGenIP) { |
| builder.restoreIP(codeGenIP); |
| convertOmpOpRegions(region, "omp.section.region", builder, |
| moduleTranslation, bodyGenStatus); |
| }; |
| sectionCBs.push_back(sectionCB); |
| } |
| |
| // No sections within omp.sections operation - skip generation. This situation |
| // is only possible if there is only a terminator operation inside the |
| // sections operation |
| if (sectionCBs.empty()) |
| return success(); |
| |
| assert(isa<omp::SectionOp>(*sectionsOp.getRegion().op_begin())); |
| |
| // TODO: Perform appropriate actions according to the data-sharing |
| // attribute (shared, private, firstprivate, ...) of variables. |
| // Currently defaults to shared. |
| auto privCB = [&](InsertPointTy, InsertPointTy codeGenIP, llvm::Value &, |
| llvm::Value &vPtr, |
| llvm::Value *&replacementValue) -> InsertPointTy { |
| replacementValue = &vPtr; |
| return codeGenIP; |
| }; |
| |
| // TODO: Perform finalization actions for variables. This has to be |
| // called for variables which have destructors/finalizers. |
| auto finiCB = [&](InsertPointTy codeGenIP) {}; |
| |
| llvm::OpenMPIRBuilder::InsertPointTy allocaIP = |
| findAllocaInsertPoint(builder, moduleTranslation); |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSections( |
| ompLoc, allocaIP, sectionCBs, privCB, finiCB, false, |
| sectionsOp.getNowait())); |
| return bodyGenStatus; |
| } |
| |
| /// Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder. |
| static LogicalResult |
| convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| LogicalResult bodyGenStatus = success(); |
| auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) { |
| builder.restoreIP(codegenIP); |
| convertOmpOpRegions(singleOp.getRegion(), "omp.single.region", builder, |
| moduleTranslation, bodyGenStatus); |
| }; |
| auto finiCB = [&](InsertPointTy codeGenIP) {}; |
| |
| // Handle copyprivate |
| Operation::operand_range cpVars = singleOp.getCopyprivateVars(); |
| std::optional<ArrayAttr> cpFuncs = singleOp.getCopyprivateFuncs(); |
| llvm::SmallVector<llvm::Value *> llvmCPVars; |
| llvm::SmallVector<llvm::Function *> llvmCPFuncs; |
| for (size_t i = 0, e = cpVars.size(); i < e; ++i) { |
| llvmCPVars.push_back(moduleTranslation.lookupValue(cpVars[i])); |
| auto llvmFuncOp = SymbolTable::lookupNearestSymbolFrom<LLVM::LLVMFuncOp>( |
| singleOp, cast<SymbolRefAttr>((*cpFuncs)[i])); |
| llvmCPFuncs.push_back( |
| moduleTranslation.lookupFunction(llvmFuncOp.getName())); |
| } |
| |
| builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSingle( |
| ompLoc, bodyCB, finiCB, singleOp.getNowait(), llvmCPVars, llvmCPFuncs)); |
| return bodyGenStatus; |
| } |
| |
| // Convert an OpenMP Teams construct to LLVM IR using OpenMPIRBuilder |
| static LogicalResult |
| convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
| LogicalResult bodyGenStatus = success(); |
| if (!op.getAllocatorsVars().empty() || op.getReductions()) |
| return op.emitError("unhandled clauses for translation to LLVM IR"); |
| |
| auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) { |
| LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame( |
| moduleTranslation, allocaIP); |
| builder.restoreIP(codegenIP); |
| convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder, |
| moduleTranslation, bodyGenStatus); |
| }; |
| |
| llvm::Value *numTeamsLower = nullptr; |
| if (Value numTeamsLowerVar = op.getNumTeamsLower()) |
| numTeamsLower = moduleTranslation.lookupValue(numTeamsLowerVar); |
| |
| llvm::Value *numTeamsUpper = nullptr; |
| if (Value numTeamsUpperVar = op.getNumTeamsUpper()) |
| numTeamsUpper = moduleTranslation.lookupValue(numTeamsUpperVar); |
| |
| llvm::Value *threadLimit = nullptr; |
| if (Value threadLimitVar = op.getThreadLimit()) |
| threadLimit = moduleTranslation.lookupValue(threadLimitVar); |
| |
| llvm::Value *ifExpr = nullptr; |
| if (Value ifExprVar = op.getIfExpr()) |
| ifExpr = moduleTranslation.lookupValue(ifExprVar); |
| |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTeams( |
| ompLoc, bodyCB, numTeamsLower, numTeamsUpper, threadLimit, ifExpr)); |
| return bodyGenStatus; |
| } |
| |
| /// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder. |
| static LogicalResult |
| convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
| LogicalResult bodyGenStatus = success(); |
| if (taskOp.getUntiedAttr() || taskOp.getMergeableAttr() || |
| taskOp.getInReductions() || taskOp.getPriority() || |
| !taskOp.getAllocateVars().empty()) { |
| return taskOp.emitError("unhandled clauses for translation to LLVM IR"); |
| } |
| auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) { |
| // Save the alloca insertion point on ModuleTranslation stack for use in |
| // nested regions. |
| LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame( |
| moduleTranslation, allocaIP); |
| |
| builder.restoreIP(codegenIP); |
| convertOmpOpRegions(taskOp.getRegion(), "omp.task.region", builder, |
| moduleTranslation, bodyGenStatus); |
| }; |
| |
| SmallVector<llvm::OpenMPIRBuilder::DependData> dds; |
| if (!taskOp.getDependVars().empty() && taskOp.getDepends()) { |
| for (auto dep : |
| llvm::zip(taskOp.getDependVars(), taskOp.getDepends()->getValue())) { |
| llvm::omp::RTLDependenceKindTy type; |
| switch ( |
| cast<mlir::omp::ClauseTaskDependAttr>(std::get<1>(dep)).getValue()) { |
| case mlir::omp::ClauseTaskDepend::taskdependin: |
| type = llvm::omp::RTLDependenceKindTy::DepIn; |
| break; |
| // The OpenMP runtime requires that the codegen for 'depend' clause for |
| // 'out' dependency kind must be the same as codegen for 'depend' clause |
| // with 'inout' dependency. |
| case mlir::omp::ClauseTaskDepend::taskdependout: |
| case mlir::omp::ClauseTaskDepend::taskdependinout: |
| type = llvm::omp::RTLDependenceKindTy::DepInOut; |
| break; |
| }; |
| llvm::Value *depVal = moduleTranslation.lookupValue(std::get<0>(dep)); |
| llvm::OpenMPIRBuilder::DependData dd(type, depVal->getType(), depVal); |
| dds.emplace_back(dd); |
| } |
| } |
| |
| llvm::OpenMPIRBuilder::InsertPointTy allocaIP = |
| findAllocaInsertPoint(builder, moduleTranslation); |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTask( |
| ompLoc, allocaIP, bodyCB, !taskOp.getUntied(), |
| moduleTranslation.lookupValue(taskOp.getFinalExpr()), |
| moduleTranslation.lookupValue(taskOp.getIfExpr()), dds)); |
| return bodyGenStatus; |
| } |
| |
| /// Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder. |
| static LogicalResult |
| convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
| LogicalResult bodyGenStatus = success(); |
| if (!tgOp.getTaskReductionVars().empty() || !tgOp.getAllocateVars().empty()) { |
| return tgOp.emitError("unhandled clauses for translation to LLVM IR"); |
| } |
| auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) { |
| builder.restoreIP(codegenIP); |
| convertOmpOpRegions(tgOp.getRegion(), "omp.taskgroup.region", builder, |
| moduleTranslation, bodyGenStatus); |
| }; |
| InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation); |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTaskgroup( |
| ompLoc, allocaIP, bodyCB)); |
| return bodyGenStatus; |
| } |
| |
| /// Allocate space for privatized reduction variables. |
| template <typename T> |
| static void allocByValReductionVars( |
| T loop, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation, |
| llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, |
| SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls, |
| SmallVectorImpl<llvm::Value *> &privateReductionVariables, |
| DenseMap<Value, llvm::Value *> &reductionVariableMap, |
| llvm::ArrayRef<bool> isByRefs) { |
| llvm::IRBuilderBase::InsertPointGuard guard(builder); |
| builder.restoreIP(allocaIP); |
| auto args = |
| loop.getRegion().getArguments().take_back(loop.getNumReductionVars()); |
| |
| for (std::size_t i = 0; i < loop.getNumReductionVars(); ++i) { |
| if (isByRefs[i]) |
| continue; |
| llvm::Value *var = builder.CreateAlloca( |
| moduleTranslation.convertType(reductionDecls[i].getType())); |
| moduleTranslation.mapValue(args[i], var); |
| privateReductionVariables.push_back(var); |
| reductionVariableMap.try_emplace(loop.getReductionVars()[i], var); |
| } |
| } |
| |
| /// Map input argument to all reduction initialization regions |
| template <typename T> |
| static void |
| mapInitializationArg(T loop, LLVM::ModuleTranslation &moduleTranslation, |
| SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls, |
| unsigned i) { |
| // map input argument to the initialization region |
| mlir::omp::DeclareReductionOp &reduction = reductionDecls[i]; |
| Region &initializerRegion = reduction.getInitializerRegion(); |
| Block &entry = initializerRegion.front(); |
| assert(entry.getNumArguments() == 1 && |
| "the initialization region has one argument"); |
| |
| mlir::Value mlirSource = loop.getReductionVars()[i]; |
| llvm::Value *llvmSource = moduleTranslation.lookupValue(mlirSource); |
| assert(llvmSource && "lookup reduction var"); |
| moduleTranslation.mapValue(entry.getArgument(0), llvmSource); |
| } |
| |
| /// Collect reduction info |
| template <typename T> |
| static void collectReductionInfo( |
| T loop, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation, |
| SmallVector<omp::DeclareReductionOp> &reductionDecls, |
| SmallVector<OwningReductionGen> &owningReductionGens, |
| SmallVector<OwningAtomicReductionGen> &owningAtomicReductionGens, |
| const SmallVector<llvm::Value *> &privateReductionVariables, |
| SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> &reductionInfos) { |
| unsigned numReductions = loop.getNumReductionVars(); |
| |
| for (unsigned i = 0; i < numReductions; ++i) { |
| owningReductionGens.push_back( |
| makeReductionGen(reductionDecls[i], builder, moduleTranslation)); |
| owningAtomicReductionGens.push_back( |
| makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation)); |
| } |
| |
| // Collect the reduction information. |
| reductionInfos.reserve(numReductions); |
| for (unsigned i = 0; i < numReductions; ++i) { |
| llvm::OpenMPIRBuilder::AtomicReductionGenTy atomicGen = nullptr; |
| if (owningAtomicReductionGens[i]) |
| atomicGen = owningAtomicReductionGens[i]; |
| llvm::Value *variable = |
| moduleTranslation.lookupValue(loop.getReductionVars()[i]); |
| reductionInfos.push_back( |
| {moduleTranslation.convertType(reductionDecls[i].getType()), variable, |
| privateReductionVariables[i], owningReductionGens[i], atomicGen}); |
| } |
| } |
| |
| /// handling of DeclareReductionOp's cleanup region |
| static LogicalResult |
| inlineOmpRegionCleanup(llvm::SmallVectorImpl<Region *> &cleanupRegions, |
| llvm::ArrayRef<llvm::Value *> privateVariables, |
| LLVM::ModuleTranslation &moduleTranslation, |
| llvm::IRBuilderBase &builder, StringRef regionName, |
| bool shouldLoadCleanupRegionArg = true) { |
| for (auto [i, cleanupRegion] : llvm::enumerate(cleanupRegions)) { |
| if (cleanupRegion->empty()) |
| continue; |
| |
| // map the argument to the cleanup region |
| Block &entry = cleanupRegion->front(); |
| |
| llvm::Instruction *potentialTerminator = |
| builder.GetInsertBlock()->empty() ? nullptr |
| : &builder.GetInsertBlock()->back(); |
| if (potentialTerminator && potentialTerminator->isTerminator()) |
| builder.SetInsertPoint(potentialTerminator); |
| llvm::Value *prviateVarValue = |
| shouldLoadCleanupRegionArg |
| ? builder.CreateLoad( |
| moduleTranslation.convertType(entry.getArgument(0).getType()), |
| privateVariables[i]) |
| : privateVariables[i]; |
| |
| moduleTranslation.mapValue(entry.getArgument(0), prviateVarValue); |
| |
| if (failed(inlineConvertOmpRegions(*cleanupRegion, regionName, builder, |
| moduleTranslation))) |
| return failure(); |
| |
| // clear block argument mapping in case it needs to be re-created with a |
| // different source for another use of the same reduction decl |
| moduleTranslation.forgetMapping(*cleanupRegion); |
| } |
| return success(); |
| } |
| |
| static ArrayRef<bool> getIsByRef(std::optional<ArrayRef<bool>> attr) { |
| if (!attr) |
| return {}; |
| return *attr; |
| } |
| |
| /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder. |
| static LogicalResult |
| convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| auto wsloopOp = cast<omp::WsloopOp>(opInst); |
| auto loopOp = cast<omp::LoopNestOp>(wsloopOp.getWrappedLoop()); |
| |
| llvm::ArrayRef<bool> isByRef = getIsByRef(wsloopOp.getReductionVarsByref()); |
| assert(isByRef.size() == wsloopOp.getNumReductionVars()); |
| |
| // Static is the default. |
| auto schedule = |
| wsloopOp.getScheduleVal().value_or(omp::ClauseScheduleKind::Static); |
| |
| // Find the loop configuration. |
| llvm::Value *step = moduleTranslation.lookupValue(loopOp.getStep()[0]); |
| llvm::Type *ivType = step->getType(); |
| llvm::Value *chunk = nullptr; |
| if (wsloopOp.getScheduleChunkVar()) { |
| llvm::Value *chunkVar = |
| moduleTranslation.lookupValue(wsloopOp.getScheduleChunkVar()); |
| chunk = builder.CreateSExtOrTrunc(chunkVar, ivType); |
| } |
| |
| SmallVector<omp::DeclareReductionOp> reductionDecls; |
| collectReductionDecls(wsloopOp, reductionDecls); |
| llvm::OpenMPIRBuilder::InsertPointTy allocaIP = |
| findAllocaInsertPoint(builder, moduleTranslation); |
| |
| SmallVector<llvm::Value *> privateReductionVariables; |
| DenseMap<Value, llvm::Value *> reductionVariableMap; |
| allocByValReductionVars(wsloopOp, builder, moduleTranslation, allocaIP, |
| reductionDecls, privateReductionVariables, |
| reductionVariableMap, isByRef); |
| |
| // Before the loop, store the initial values of reductions into reduction |
| // variables. Although this could be done after allocas, we don't want to mess |
| // up with the alloca insertion point. |
| ArrayRef<BlockArgument> reductionArgs = wsloopOp.getRegion().getArguments(); |
| for (unsigned i = 0; i < wsloopOp.getNumReductionVars(); ++i) { |
| SmallVector<llvm::Value *> phis; |
| |
| // map block argument to initializer region |
| mapInitializationArg(wsloopOp, moduleTranslation, reductionDecls, i); |
| |
| if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(), |
| "omp.reduction.neutral", builder, |
| moduleTranslation, &phis))) |
| return failure(); |
| assert(phis.size() == 1 && "expected one value to be yielded from the " |
| "reduction neutral element declaration region"); |
| if (isByRef[i]) { |
| // Allocate reduction variable (which is a pointer to the real reduction |
| // variable allocated in the inlined region) |
| llvm::Value *var = builder.CreateAlloca( |
| moduleTranslation.convertType(reductionDecls[i].getType())); |
| // Store the result of the inlined region to the allocated reduction var |
| // ptr |
| builder.CreateStore(phis[0], var); |
| |
| privateReductionVariables.push_back(var); |
| moduleTranslation.mapValue(reductionArgs[i], phis[0]); |
| reductionVariableMap.try_emplace(wsloopOp.getReductionVars()[i], phis[0]); |
| } else { |
| // for by-ref case the store is inside of the reduction region |
| builder.CreateStore(phis[0], privateReductionVariables[i]); |
| // the rest was handled in allocByValReductionVars |
| } |
| |
| // forget the mapping for the initializer region because we might need a |
| // different mapping if this reduction declaration is re-used for a |
| // different variable |
| moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion()); |
| } |
| |
| // Store the mapping between reduction variables and their private copies on |
| // ModuleTranslation stack. It can be then recovered when translating |
| // omp.reduce operations in a separate call. |
| LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard( |
| moduleTranslation, reductionVariableMap); |
| |
| // Set up the source location value for OpenMP runtime. |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| |
| // Generator of the canonical loop body. |
| // TODO: support error propagation in OpenMPIRBuilder and use it instead of |
| // relying on captured variables. |
| SmallVector<llvm::CanonicalLoopInfo *> loopInfos; |
| SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints; |
| LogicalResult bodyGenStatus = success(); |
| auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) { |
| // Make sure further conversions know about the induction variable. |
| moduleTranslation.mapValue( |
| loopOp.getRegion().front().getArgument(loopInfos.size()), iv); |
| |
| // Capture the body insertion point for use in nested loops. BodyIP of the |
| // CanonicalLoopInfo always points to the beginning of the entry block of |
| // the body. |
| bodyInsertPoints.push_back(ip); |
| |
| if (loopInfos.size() != loopOp.getNumLoops() - 1) |
| return; |
| |
| // Convert the body of the loop. |
| builder.restoreIP(ip); |
| convertOmpOpRegions(loopOp.getRegion(), "omp.wsloop.region", builder, |
| moduleTranslation, bodyGenStatus); |
| }; |
| |
| // Delegate actual loop construction to the OpenMP IRBuilder. |
| // TODO: this currently assumes omp.loop_nest is semantically similar to SCF |
| // loop, i.e. it has a positive step, uses signed integer semantics. |
| // Reconsider this code when the nested loop operation clearly supports more |
| // cases. |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| for (unsigned i = 0, e = loopOp.getNumLoops(); i < e; ++i) { |
| llvm::Value *lowerBound = |
| moduleTranslation.lookupValue(loopOp.getLowerBound()[i]); |
| llvm::Value *upperBound = |
| moduleTranslation.lookupValue(loopOp.getUpperBound()[i]); |
| llvm::Value *step = moduleTranslation.lookupValue(loopOp.getStep()[i]); |
| |
| // Make sure loop trip count are emitted in the preheader of the outermost |
| // loop at the latest so that they are all available for the new collapsed |
| // loop will be created below. |
| llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc; |
| llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP; |
| if (i != 0) { |
| loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back()); |
| computeIP = loopInfos.front()->getPreheaderIP(); |
| } |
| loopInfos.push_back(ompBuilder->createCanonicalLoop( |
| loc, bodyGen, lowerBound, upperBound, step, |
| /*IsSigned=*/true, loopOp.getInclusive(), computeIP)); |
| |
| if (failed(bodyGenStatus)) |
| return failure(); |
| } |
| |
| // Collapse loops. Store the insertion point because LoopInfos may get |
| // invalidated. |
| llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP(); |
| llvm::CanonicalLoopInfo *loopInfo = |
| ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {}); |
| |
| allocaIP = findAllocaInsertPoint(builder, moduleTranslation); |
| |
| // TODO: Handle doacross loops when the ordered clause has a parameter. |
| bool isOrdered = wsloopOp.getOrderedVal().has_value(); |
| std::optional<omp::ScheduleModifier> scheduleModifier = |
| wsloopOp.getScheduleModifier(); |
| bool isSimd = wsloopOp.getSimdModifier(); |
| |
| ompBuilder->applyWorkshareLoop( |
| ompLoc.DL, loopInfo, allocaIP, !wsloopOp.getNowait(), |
| convertToScheduleKind(schedule), chunk, isSimd, |
| scheduleModifier == omp::ScheduleModifier::monotonic, |
| scheduleModifier == omp::ScheduleModifier::nonmonotonic, isOrdered); |
| |
| // Continue building IR after the loop. Note that the LoopInfo returned by |
| // `collapseLoops` points inside the outermost loop and is intended for |
| // potential further loop transformations. Use the insertion point stored |
| // before collapsing loops instead. |
| builder.restoreIP(afterIP); |
| |
| // Process the reductions if required. |
| if (wsloopOp.getNumReductionVars() == 0) |
| return success(); |
| |
| // Create the reduction generators. We need to own them here because |
| // ReductionInfo only accepts references to the generators. |
| SmallVector<OwningReductionGen> owningReductionGens; |
| SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens; |
| SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos; |
| collectReductionInfo(wsloopOp, builder, moduleTranslation, reductionDecls, |
| owningReductionGens, owningAtomicReductionGens, |
| privateReductionVariables, reductionInfos); |
| |
| // The call to createReductions below expects the block to have a |
| // terminator. Create an unreachable instruction to serve as terminator |
| // and remove it later. |
| llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable(); |
| builder.SetInsertPoint(tempTerminator); |
| llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint = |
| ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos, |
| isByRef, wsloopOp.getNowait()); |
| if (!contInsertPoint.getBlock()) |
| return wsloopOp->emitOpError() << "failed to convert reductions"; |
| auto nextInsertionPoint = |
| ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for); |
| tempTerminator->eraseFromParent(); |
| builder.restoreIP(nextInsertionPoint); |
| |
| // after the workshare loop, deallocate private reduction variables |
| SmallVector<Region *> reductionRegions; |
| llvm::transform(reductionDecls, std::back_inserter(reductionRegions), |
| [](omp::DeclareReductionOp reductionDecl) { |
| return &reductionDecl.getCleanupRegion(); |
| }); |
| return inlineOmpRegionCleanup(reductionRegions, privateReductionVariables, |
| moduleTranslation, builder, |
| "omp.reduction.cleanup"); |
| } |
| |
| /// A RAII class that on construction replaces the region arguments of the |
| /// parallel op (which correspond to private variables) with the actual private |
| /// variables they correspond to. This prepares the parallel op so that it |
| /// matches what is expected by the OMPIRBuilder. |
| /// |
| /// On destruction, it restores the original state of the operation so that on |
| /// the MLIR side, the op is not affected by conversion to LLVM IR. |
| class OmpParallelOpConversionManager { |
| public: |
| OmpParallelOpConversionManager(omp::ParallelOp opInst) |
| : region(opInst.getRegion()), privateVars(opInst.getPrivateVars()), |
| privateArgBeginIdx(opInst.getNumReductionVars()), |
| privateArgEndIdx(privateArgBeginIdx + privateVars.size()) { |
| auto privateVarsIt = privateVars.begin(); |
| |
| for (size_t argIdx = privateArgBeginIdx; argIdx < privateArgEndIdx; |
| ++argIdx, ++privateVarsIt) |
| mlir::replaceAllUsesInRegionWith(region.getArgument(argIdx), |
| *privateVarsIt, region); |
| } |
| |
| ~OmpParallelOpConversionManager() { |
| auto privateVarsIt = privateVars.begin(); |
| |
| for (size_t argIdx = privateArgBeginIdx; argIdx < privateArgEndIdx; |
| ++argIdx, ++privateVarsIt) |
| mlir::replaceAllUsesInRegionWith(*privateVarsIt, |
| region.getArgument(argIdx), region); |
| } |
| |
| private: |
| Region ®ion; |
| OperandRange privateVars; |
| unsigned privateArgBeginIdx; |
| unsigned privateArgEndIdx; |
| }; |
| |
| /// Converts the OpenMP parallel operation to LLVM IR. |
| static LogicalResult |
| convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
| OmpParallelOpConversionManager raii(opInst); |
| ArrayRef<bool> isByRef = getIsByRef(opInst.getReductionVarsByref()); |
| assert(isByRef.size() == opInst.getNumReductionVars()); |
| |
| // TODO: support error propagation in OpenMPIRBuilder and use it instead of |
| // relying on captured variables. |
| LogicalResult bodyGenStatus = success(); |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| |
| // Collect reduction declarations |
| SmallVector<omp::DeclareReductionOp> reductionDecls; |
| collectReductionDecls(opInst, reductionDecls); |
| SmallVector<llvm::Value *> privateReductionVariables; |
| |
| auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { |
| // Allocate reduction vars |
| DenseMap<Value, llvm::Value *> reductionVariableMap; |
| allocByValReductionVars(opInst, builder, moduleTranslation, allocaIP, |
| reductionDecls, privateReductionVariables, |
| reductionVariableMap, isByRef); |
| |
| // Initialize reduction vars |
| builder.restoreIP(allocaIP); |
| MutableArrayRef<BlockArgument> reductionArgs = |
| opInst.getRegion().getArguments().take_back( |
| opInst.getNumReductionVars()); |
| for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) { |
| SmallVector<llvm::Value *> phis; |
| |
| // map the block argument |
| mapInitializationArg(opInst, moduleTranslation, reductionDecls, i); |
| if (failed(inlineConvertOmpRegions( |
| reductionDecls[i].getInitializerRegion(), "omp.reduction.neutral", |
| builder, moduleTranslation, &phis))) |
| bodyGenStatus = failure(); |
| assert(phis.size() == 1 && |
| "expected one value to be yielded from the " |
| "reduction neutral element declaration region"); |
| builder.restoreIP(allocaIP); |
| |
| if (isByRef[i]) { |
| // Allocate reduction variable (which is a pointer to the real reduciton |
| // variable allocated in the inlined region) |
| llvm::Value *var = builder.CreateAlloca( |
| moduleTranslation.convertType(reductionDecls[i].getType())); |
| // Store the result of the inlined region to the allocated reduction var |
| // ptr |
| builder.CreateStore(phis[0], var); |
| |
| privateReductionVariables.push_back(var); |
| moduleTranslation.mapValue(reductionArgs[i], phis[0]); |
| reductionVariableMap.try_emplace(opInst.getReductionVars()[i], phis[0]); |
| } else { |
| // for by-ref case the store is inside of the reduction init region |
| builder.CreateStore(phis[0], privateReductionVariables[i]); |
| // the rest is done in allocByValReductionVars |
| } |
| |
| // clear block argument mapping in case it needs to be re-created with a |
| // different source for another use of the same reduction decl |
| moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion()); |
| } |
| |
| // Store the mapping between reduction variables and their private copies on |
| // ModuleTranslation stack. It can be then recovered when translating |
| // omp.reduce operations in a separate call. |
| LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard( |
| moduleTranslation, reductionVariableMap); |
| |
| // Save the alloca insertion point on ModuleTranslation stack for use in |
| // nested regions. |
| LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame( |
| moduleTranslation, allocaIP); |
| |
| // ParallelOp has only one region associated with it. |
| builder.restoreIP(codeGenIP); |
| auto regionBlock = |
| convertOmpOpRegions(opInst.getRegion(), "omp.par.region", builder, |
| moduleTranslation, bodyGenStatus); |
| |
| // Process the reductions if required. |
| if (opInst.getNumReductionVars() > 0) { |
| // Collect reduction info |
| SmallVector<OwningReductionGen> owningReductionGens; |
| SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens; |
| SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos; |
| collectReductionInfo(opInst, builder, moduleTranslation, reductionDecls, |
| owningReductionGens, owningAtomicReductionGens, |
| privateReductionVariables, reductionInfos); |
| |
| // Move to region cont block |
| builder.SetInsertPoint(regionBlock->getTerminator()); |
| |
| // Generate reductions from info |
| llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable(); |
| builder.SetInsertPoint(tempTerminator); |
| |
| llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint = |
| ompBuilder->createReductions(builder.saveIP(), allocaIP, |
| reductionInfos, isByRef, false); |
| if (!contInsertPoint.getBlock()) { |
| bodyGenStatus = opInst->emitOpError() << "failed to convert reductions"; |
| return; |
| } |
| |
| tempTerminator->eraseFromParent(); |
| builder.restoreIP(contInsertPoint); |
| } |
| }; |
| |
| SmallVector<omp::PrivateClauseOp> privatizerClones; |
| SmallVector<llvm::Value *> privateVariables; |
| |
| // TODO: Perform appropriate actions according to the data-sharing |
| // attribute (shared, private, firstprivate, ...) of variables. |
| // Currently shared and private are supported. |
| auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, |
| llvm::Value &, llvm::Value &vPtr, |
| llvm::Value *&replacementValue) -> InsertPointTy { |
| replacementValue = &vPtr; |
| |
| // If this is a private value, this lambda will return the corresponding |
| // mlir value and its `PrivateClauseOp`. Otherwise, empty values are |
| // returned. |
| auto [privVar, privatizerClone] = |
| [&]() -> std::pair<mlir::Value, omp::PrivateClauseOp> { |
| if (!opInst.getPrivateVars().empty()) { |
| auto privVars = opInst.getPrivateVars(); |
| auto privatizers = opInst.getPrivatizers(); |
| |
| for (auto [privVar, privatizerAttr] : |
| llvm::zip_equal(privVars, *privatizers)) { |
| // Find the MLIR private variable corresponding to the LLVM value |
| // being privatized. |
| llvm::Value *llvmPrivVar = moduleTranslation.lookupValue(privVar); |
| if (llvmPrivVar != &vPtr) |
| continue; |
| |
| SymbolRefAttr privSym = llvm::cast<SymbolRefAttr>(privatizerAttr); |
| omp::PrivateClauseOp privatizer = |
| SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>( |
| opInst, privSym); |
| |
| // Clone the privatizer in case it is used by more than one parallel |
| // region. The privatizer is processed in-place (see below) before it |
| // gets inlined in the parallel region and therefore processing the |
| // original op is dangerous. |
| return {privVar, privatizer.clone()}; |
| } |
| } |
| |
| return {mlir::Value(), omp::PrivateClauseOp()}; |
| }(); |
| |
| if (privVar) { |
| Region &allocRegion = privatizerClone.getAllocRegion(); |
| |
| // If this is a `firstprivate` clause, prepare the `omp.private` op by: |
| if (privatizerClone.getDataSharingType() == |
| omp::DataSharingClauseType::FirstPrivate) { |
| auto oldAllocBackBlock = std::prev(allocRegion.end()); |
| omp::YieldOp oldAllocYieldOp = |
| llvm::cast<omp::YieldOp>(oldAllocBackBlock->getTerminator()); |
| |
| Region ©Region = privatizerClone.getCopyRegion(); |
| |
| mlir::IRRewriter copyCloneBuilder(&moduleTranslation.getContext()); |
| // 1. Cloning the `copy` region to the end of the `alloc` region. |
| copyCloneBuilder.cloneRegionBefore(copyRegion, allocRegion, |
| allocRegion.end()); |
| |
| auto newCopyRegionFrontBlock = std::next(oldAllocBackBlock); |
| // 2. Merging the last `alloc` block with the first block in the `copy` |
| // region clone. |
| // 3. Re-mapping the first argument of the `copy` region to be the |
| // argument of the `alloc` region and the second argument of the `copy` |
| // region to be the yielded value of the `alloc` region (this is the |
| // private clone of the privatized value). |
| copyCloneBuilder.mergeBlocks( |
| &*newCopyRegionFrontBlock, &*oldAllocBackBlock, |
| {allocRegion.getArgument(0), oldAllocYieldOp.getOperand(0)}); |
| |
| // 4. The old terminator of the `alloc` region is not needed anymore, so |
| // delete it. |
| oldAllocYieldOp.erase(); |
| } |
| |
| // Replace the privatizer block argument with mlir value being privatized. |
| // This way, the body of the privatizer will be changed from using the |
| // region/block argument to the value being privatized. |
| auto allocRegionArg = allocRegion.getArgument(0); |
| replaceAllUsesInRegionWith(allocRegionArg, privVar, allocRegion); |
| |
| auto oldIP = builder.saveIP(); |
| builder.restoreIP(allocaIP); |
| |
| SmallVector<llvm::Value *, 1> yieldedValues; |
| if (failed(inlineConvertOmpRegions(allocRegion, "omp.privatizer", builder, |
| moduleTranslation, &yieldedValues))) { |
| opInst.emitError("failed to inline `alloc` region of an `omp.private` " |
| "op in the parallel region"); |
| bodyGenStatus = failure(); |
| privatizerClone.erase(); |
| } else { |
| assert(yieldedValues.size() == 1); |
| replacementValue = yieldedValues.front(); |
| |
| // Keep the LLVM replacement value and the op clone in case we need to |
| // emit cleanup (i.e. deallocation) logic. |
| privateVariables.push_back(replacementValue); |
| privatizerClones.push_back(privatizerClone); |
| } |
| |
| builder.restoreIP(oldIP); |
| } |
| |
| return codeGenIP; |
| }; |
| |
| // TODO: Perform finalization actions for variables. This has to be |
| // called for variables which have destructors/finalizers. |
| auto finiCB = [&](InsertPointTy codeGenIP) { |
| InsertPointTy oldIP = builder.saveIP(); |
| builder.restoreIP(codeGenIP); |
| |
| // if the reduction has a cleanup region, inline it here to finalize the |
| // reduction variables |
| SmallVector<Region *> reductionCleanupRegions; |
| llvm::transform(reductionDecls, std::back_inserter(reductionCleanupRegions), |
| [](omp::DeclareReductionOp reductionDecl) { |
| return &reductionDecl.getCleanupRegion(); |
| }); |
| if (failed(inlineOmpRegionCleanup( |
| reductionCleanupRegions, privateReductionVariables, |
| moduleTranslation, builder, "omp.reduction.cleanup"))) |
| bodyGenStatus = failure(); |
| |
| SmallVector<Region *> privateCleanupRegions; |
| llvm::transform(privatizerClones, std::back_inserter(privateCleanupRegions), |
| [](omp::PrivateClauseOp privatizer) { |
| return &privatizer.getDeallocRegion(); |
| }); |
| |
| if (failed(inlineOmpRegionCleanup( |
| privateCleanupRegions, privateVariables, moduleTranslation, builder, |
| "omp.private.dealloc", /*shouldLoadCleanupRegionArg=*/false))) |
| bodyGenStatus = failure(); |
| |
| builder.restoreIP(oldIP); |
| }; |
| |
| llvm::Value *ifCond = nullptr; |
| if (auto ifExprVar = opInst.getIfExpr()) |
| ifCond = moduleTranslation.lookupValue(ifExprVar); |
| llvm::Value *numThreads = nullptr; |
| if (auto numThreadsVar = opInst.getNumThreadsVar()) |
| numThreads = moduleTranslation.lookupValue(numThreadsVar); |
| auto pbKind = llvm::omp::OMP_PROC_BIND_default; |
| if (auto bind = opInst.getProcBindVal()) |
| pbKind = getProcBindKind(*bind); |
| // TODO: Is the Parallel construct cancellable? |
| bool isCancellable = false; |
| |
| llvm::OpenMPIRBuilder::InsertPointTy allocaIP = |
| findAllocaInsertPoint(builder, moduleTranslation); |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| |
| builder.restoreIP( |
| ompBuilder->createParallel(ompLoc, allocaIP, bodyGenCB, privCB, finiCB, |
| ifCond, numThreads, pbKind, isCancellable)); |
| |
| for (mlir::omp::PrivateClauseOp privatizerClone : privatizerClones) |
| privatizerClone.erase(); |
| |
| return bodyGenStatus; |
| } |
| |
| /// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder. |
| static LogicalResult |
| convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| auto simdOp = cast<omp::SimdOp>(opInst); |
| auto loopOp = cast<omp::LoopNestOp>(simdOp.getWrappedLoop()); |
| |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| |
| // Generator of the canonical loop body. |
| // TODO: support error propagation in OpenMPIRBuilder and use it instead of |
| // relying on captured variables. |
| SmallVector<llvm::CanonicalLoopInfo *> loopInfos; |
| SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints; |
| LogicalResult bodyGenStatus = success(); |
| auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) { |
| // Make sure further conversions know about the induction variable. |
| moduleTranslation.mapValue( |
| loopOp.getRegion().front().getArgument(loopInfos.size()), iv); |
| |
| // Capture the body insertion point for use in nested loops. BodyIP of the |
| // CanonicalLoopInfo always points to the beginning of the entry block of |
| // the body. |
| bodyInsertPoints.push_back(ip); |
| |
| if (loopInfos.size() != loopOp.getNumLoops() - 1) |
| return; |
| |
| // Convert the body of the loop. |
| builder.restoreIP(ip); |
| convertOmpOpRegions(loopOp.getRegion(), "omp.simd.region", builder, |
| moduleTranslation, bodyGenStatus); |
| }; |
| |
| // Delegate actual loop construction to the OpenMP IRBuilder. |
| // TODO: this currently assumes omp.loop_nest is semantically similar to SCF |
| // loop, i.e. it has a positive step, uses signed integer semantics. |
| // Reconsider this code when the nested loop operation clearly supports more |
| // cases. |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| for (unsigned i = 0, e = loopOp.getNumLoops(); i < e; ++i) { |
| llvm::Value *lowerBound = |
| moduleTranslation.lookupValue(loopOp.getLowerBound()[i]); |
| llvm::Value *upperBound = |
| moduleTranslation.lookupValue(loopOp.getUpperBound()[i]); |
| llvm::Value *step = moduleTranslation.lookupValue(loopOp.getStep()[i]); |
| |
| // Make sure loop trip count are emitted in the preheader of the outermost |
| // loop at the latest so that they are all available for the new collapsed |
| // loop will be created below. |
| llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc; |
| llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP; |
| if (i != 0) { |
| loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(), |
| ompLoc.DL); |
| computeIP = loopInfos.front()->getPreheaderIP(); |
| } |
| loopInfos.push_back(ompBuilder->createCanonicalLoop( |
| loc, bodyGen, lowerBound, upperBound, step, |
| /*IsSigned=*/true, /*Inclusive=*/true, computeIP)); |
| |
| if (failed(bodyGenStatus)) |
| return failure(); |
| } |
| |
| // Collapse loops. |
| llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP(); |
| llvm::CanonicalLoopInfo *loopInfo = |
| ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {}); |
| |
| llvm::ConstantInt *simdlen = nullptr; |
| if (std::optional<uint64_t> simdlenVar = simdOp.getSimdlen()) |
| simdlen = builder.getInt64(simdlenVar.value()); |
| |
| llvm::ConstantInt *safelen = nullptr; |
| if (std::optional<uint64_t> safelenVar = simdOp.getSafelen()) |
| safelen = builder.getInt64(safelenVar.value()); |
| |
| llvm::MapVector<llvm::Value *, llvm::Value *> alignedVars; |
| ompBuilder->applySimd( |
| loopInfo, alignedVars, |
| simdOp.getIfExpr() ? moduleTranslation.lookupValue(simdOp.getIfExpr()) |
| : nullptr, |
| llvm::omp::OrderKind::OMP_ORDER_unknown, simdlen, safelen); |
| |
| builder.restoreIP(afterIP); |
| return success(); |
| } |
| |
| /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering. |
| static llvm::AtomicOrdering |
| convertAtomicOrdering(std::optional<omp::ClauseMemoryOrderKind> ao) { |
| if (!ao) |
| return llvm::AtomicOrdering::Monotonic; // Default Memory Ordering |
| |
| switch (*ao) { |
| case omp::ClauseMemoryOrderKind::Seq_cst: |
| return llvm::AtomicOrdering::SequentiallyConsistent; |
| case omp::ClauseMemoryOrderKind::Acq_rel: |
| return llvm::AtomicOrdering::AcquireRelease; |
| case omp::ClauseMemoryOrderKind::Acquire: |
| return llvm::AtomicOrdering::Acquire; |
| case omp::ClauseMemoryOrderKind::Release: |
| return llvm::AtomicOrdering::Release; |
| case omp::ClauseMemoryOrderKind::Relaxed: |
| return llvm::AtomicOrdering::Monotonic; |
| } |
| llvm_unreachable("Unknown ClauseMemoryOrderKind kind"); |
| } |
| |
| /// Convert omp.atomic.read operation to LLVM IR. |
| static LogicalResult |
| convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| |
| auto readOp = cast<omp::AtomicReadOp>(opInst); |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| |
| llvm::AtomicOrdering AO = convertAtomicOrdering(readOp.getMemoryOrderVal()); |
| llvm::Value *x = moduleTranslation.lookupValue(readOp.getX()); |
| llvm::Value *v = moduleTranslation.lookupValue(readOp.getV()); |
| |
| llvm::Type *elementType = |
| moduleTranslation.convertType(readOp.getElementType()); |
| |
| llvm::OpenMPIRBuilder::AtomicOpValue V = {v, elementType, false, false}; |
| llvm::OpenMPIRBuilder::AtomicOpValue X = {x, elementType, false, false}; |
| builder.restoreIP(ompBuilder->createAtomicRead(ompLoc, X, V, AO)); |
| return success(); |
| } |
| |
| /// Converts an omp.atomic.write operation to LLVM IR. |
| static LogicalResult |
| convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| auto writeOp = cast<omp::AtomicWriteOp>(opInst); |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.getMemoryOrderVal()); |
| llvm::Value *expr = moduleTranslation.lookupValue(writeOp.getExpr()); |
| llvm::Value *dest = moduleTranslation.lookupValue(writeOp.getX()); |
| llvm::Type *ty = moduleTranslation.convertType(writeOp.getExpr().getType()); |
| llvm::OpenMPIRBuilder::AtomicOpValue x = {dest, ty, /*isSigned=*/false, |
| /*isVolatile=*/false}; |
| builder.restoreIP(ompBuilder->createAtomicWrite(ompLoc, x, expr, ao)); |
| return success(); |
| } |
| |
| /// Converts an LLVM dialect binary operation to the corresponding enum value |
| /// for `atomicrmw` supported binary operation. |
| llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op) { |
| return llvm::TypeSwitch<Operation *, llvm::AtomicRMWInst::BinOp>(&op) |
| .Case([&](LLVM::AddOp) { return llvm::AtomicRMWInst::BinOp::Add; }) |
| .Case([&](LLVM::SubOp) { return llvm::AtomicRMWInst::BinOp::Sub; }) |
| .Case([&](LLVM::AndOp) { return llvm::AtomicRMWInst::BinOp::And; }) |
| .Case([&](LLVM::OrOp) { return llvm::AtomicRMWInst::BinOp::Or; }) |
| .Case([&](LLVM::XOrOp) { return llvm::AtomicRMWInst::BinOp::Xor; }) |
| .Case([&](LLVM::UMaxOp) { return llvm::AtomicRMWInst::BinOp::UMax; }) |
| .Case([&](LLVM::UMinOp) { return llvm::AtomicRMWInst::BinOp::UMin; }) |
| .Case([&](LLVM::FAddOp) { return llvm::AtomicRMWInst::BinOp::FAdd; }) |
| .Case([&](LLVM::FSubOp) { return llvm::AtomicRMWInst::BinOp::FSub; }) |
| .Default(llvm::AtomicRMWInst::BinOp::BAD_BINOP); |
| } |
| |
| /// Converts an OpenMP atomic update operation using OpenMPIRBuilder. |
| static LogicalResult |
| convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst, |
| llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| |
| // Convert values and types. |
| auto &innerOpList = opInst.getRegion().front().getOperations(); |
| bool isXBinopExpr{false}; |
| llvm::AtomicRMWInst::BinOp binop; |
| mlir::Value mlirExpr; |
| llvm::Value *llvmExpr = nullptr; |
| llvm::Value *llvmX = nullptr; |
| llvm::Type *llvmXElementType = nullptr; |
| if (innerOpList.size() == 2) { |
| // The two operations here are the update and the terminator. |
| // Since we can identify the update operation, there is a possibility |
| // that we can generate the atomicrmw instruction. |
| mlir::Operation &innerOp = *opInst.getRegion().front().begin(); |
| if (!llvm::is_contained(innerOp.getOperands(), |
| opInst.getRegion().getArgument(0))) { |
| return opInst.emitError("no atomic update operation with region argument" |
| " as operand found inside atomic.update region"); |
| } |
| binop = convertBinOpToAtomic(innerOp); |
| isXBinopExpr = innerOp.getOperand(0) == opInst.getRegion().getArgument(0); |
| mlirExpr = (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0)); |
| llvmExpr = moduleTranslation.lookupValue(mlirExpr); |
| } else { |
| // Since the update region includes more than one operation |
| // we will resort to generating a cmpxchg loop. |
| binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP; |
| } |
| llvmX = moduleTranslation.lookupValue(opInst.getX()); |
| llvmXElementType = moduleTranslation.convertType( |
| opInst.getRegion().getArgument(0).getType()); |
| llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType, |
| /*isSigned=*/false, |
| /*isVolatile=*/false}; |
| |
| llvm::AtomicOrdering atomicOrdering = |
| convertAtomicOrdering(opInst.getMemoryOrderVal()); |
| |
| // Generate update code. |
| LogicalResult updateGenStatus = success(); |
| auto updateFn = [&opInst, &moduleTranslation, &updateGenStatus]( |
| llvm::Value *atomicx, |
| llvm::IRBuilder<> &builder) -> llvm::Value * { |
| Block &bb = *opInst.getRegion().begin(); |
| moduleTranslation.mapValue(*opInst.getRegion().args_begin(), atomicx); |
| moduleTranslation.mapBlock(&bb, builder.GetInsertBlock()); |
| if (failed(moduleTranslation.convertBlock(bb, true, builder))) { |
| updateGenStatus = (opInst.emitError() |
| << "unable to convert update operation to llvm IR"); |
| return nullptr; |
| } |
| omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator()); |
| assert(yieldop && yieldop.getResults().size() == 1 && |
| "terminator must be omp.yield op and it must have exactly one " |
| "argument"); |
| return moduleTranslation.lookupValue(yieldop.getResults()[0]); |
| }; |
| |
| // Handle ambiguous alloca, if any. |
| auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation); |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| builder.restoreIP(ompBuilder->createAtomicUpdate( |
| ompLoc, allocaIP, llvmAtomicX, llvmExpr, atomicOrdering, binop, updateFn, |
| isXBinopExpr)); |
| return updateGenStatus; |
| } |
| |
| static LogicalResult |
| convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp, |
| llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| mlir::Value mlirExpr; |
| bool isXBinopExpr = false, isPostfixUpdate = false; |
| llvm::AtomicRMWInst::BinOp binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP; |
| |
| omp::AtomicUpdateOp atomicUpdateOp = atomicCaptureOp.getAtomicUpdateOp(); |
| omp::AtomicWriteOp atomicWriteOp = atomicCaptureOp.getAtomicWriteOp(); |
| |
| assert((atomicUpdateOp || atomicWriteOp) && |
| "internal op must be an atomic.update or atomic.write op"); |
| |
| if (atomicWriteOp) { |
| isPostfixUpdate = true; |
| mlirExpr = atomicWriteOp.getExpr(); |
| } else { |
| isPostfixUpdate = atomicCaptureOp.getSecondOp() == |
| atomicCaptureOp.getAtomicUpdateOp().getOperation(); |
| auto &innerOpList = atomicUpdateOp.getRegion().front().getOperations(); |
| bool isRegionArgUsed{false}; |
| // Find the binary update operation that uses the region argument |
| // and get the expression to update |
| for (Operation &innerOp : innerOpList) { |
| if (innerOp.getNumOperands() == 2) { |
| binop = convertBinOpToAtomic(innerOp); |
| if (!llvm::is_contained(innerOp.getOperands(), |
| atomicUpdateOp.getRegion().getArgument(0))) |
| continue; |
| isRegionArgUsed = true; |
| isXBinopExpr = |
| innerOp.getNumOperands() > 0 && |
| innerOp.getOperand(0) == atomicUpdateOp.getRegion().getArgument(0); |
| mlirExpr = |
| (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0)); |
| break; |
| } |
| } |
| if (!isRegionArgUsed) |
| return atomicUpdateOp.emitError( |
| "no atomic update operation with region argument" |
| " as operand found inside atomic.update region"); |
| } |
| |
| llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr); |
| llvm::Value *llvmX = |
| moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getX()); |
| llvm::Value *llvmV = |
| moduleTranslation.lookupValue(atomicCaptureOp.getAtomicReadOp().getV()); |
| llvm::Type *llvmXElementType = moduleTranslation.convertType( |
| atomicCaptureOp.getAtomicReadOp().getElementType()); |
| llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType, |
| /*isSigned=*/false, |
| /*isVolatile=*/false}; |
| llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicV = {llvmV, llvmXElementType, |
| /*isSigned=*/false, |
| /*isVolatile=*/false}; |
| |
| llvm::AtomicOrdering atomicOrdering = |
| convertAtomicOrdering(atomicCaptureOp.getMemoryOrderVal()); |
| |
| LogicalResult updateGenStatus = success(); |
| auto updateFn = [&](llvm::Value *atomicx, |
| llvm::IRBuilder<> &builder) -> llvm::Value * { |
| if (atomicWriteOp) |
| return moduleTranslation.lookupValue(atomicWriteOp.getExpr()); |
| Block &bb = *atomicUpdateOp.getRegion().begin(); |
| moduleTranslation.mapValue(*atomicUpdateOp.getRegion().args_begin(), |
| atomicx); |
| moduleTranslation.mapBlock(&bb, builder.GetInsertBlock()); |
| if (failed(moduleTranslation.convertBlock(bb, true, builder))) { |
| updateGenStatus = (atomicUpdateOp.emitError() |
| << "unable to convert update operation to llvm IR"); |
| return nullptr; |
| } |
| omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator()); |
| assert(yieldop && yieldop.getResults().size() == 1 && |
| "terminator must be omp.yield op and it must have exactly one " |
| "argument"); |
| return moduleTranslation.lookupValue(yieldop.getResults()[0]); |
| }; |
| |
| // Handle ambiguous alloca, if any. |
| auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation); |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| builder.restoreIP(ompBuilder->createAtomicCapture( |
| ompLoc, allocaIP, llvmAtomicX, llvmAtomicV, llvmExpr, atomicOrdering, |
| binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr)); |
| return updateGenStatus; |
| } |
| |
| /// Converts an OpenMP Threadprivate operation into LLVM IR using |
| /// OpenMPIRBuilder. |
| static LogicalResult |
| convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| auto threadprivateOp = cast<omp::ThreadprivateOp>(opInst); |
| |
| Value symAddr = threadprivateOp.getSymAddr(); |
| auto *symOp = symAddr.getDefiningOp(); |
| if (!isa<LLVM::AddressOfOp>(symOp)) |
| return opInst.emitError("Addressing symbol not found"); |
| LLVM::AddressOfOp addressOfOp = dyn_cast<LLVM::AddressOfOp>(symOp); |
| |
| LLVM::GlobalOp global = |
| addressOfOp.getGlobal(moduleTranslation.symbolTable()); |
| llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global); |
| llvm::Type *type = globalValue->getValueType(); |
| llvm::TypeSize typeSize = |
| builder.GetInsertBlock()->getModule()->getDataLayout().getTypeStoreSize( |
| type); |
| llvm::ConstantInt *size = builder.getInt64(typeSize.getFixedValue()); |
| llvm::StringRef suffix = llvm::StringRef(".cache", 6); |
| std::string cacheName = (Twine(global.getSymName()).concat(suffix)).str(); |
| llvm::Value *callInst = |
| moduleTranslation.getOpenMPBuilder()->createCachedThreadPrivate( |
| ompLoc, globalValue, size, cacheName); |
| moduleTranslation.mapValue(opInst.getResult(0), callInst); |
| return success(); |
| } |
| |
| static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind |
| convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause) { |
| switch (deviceClause) { |
| case mlir::omp::DeclareTargetDeviceType::host: |
| return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost; |
| break; |
| case mlir::omp::DeclareTargetDeviceType::nohost: |
| return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost; |
| break; |
| case mlir::omp::DeclareTargetDeviceType::any: |
| return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny; |
| break; |
| } |
| llvm_unreachable("unhandled device clause"); |
| } |
| |
| static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind |
| convertToCaptureClauseKind( |
| mlir::omp::DeclareTargetCaptureClause captureClasue) { |
| switch (captureClasue) { |
| case mlir::omp::DeclareTargetCaptureClause::to: |
| return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo; |
| case mlir::omp::DeclareTargetCaptureClause::link: |
| return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink; |
| case mlir::omp::DeclareTargetCaptureClause::enter: |
| return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter; |
| } |
| llvm_unreachable("unhandled capture clause"); |
| } |
| |
| static llvm::SmallString<64> |
| getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp, |
| llvm::OpenMPIRBuilder &ompBuilder) { |
| llvm::SmallString<64> suffix; |
| llvm::raw_svector_ostream os(suffix); |
| if (globalOp.getVisibility() == mlir::SymbolTable::Visibility::Private) { |
| auto loc = globalOp->getLoc()->findInstanceOf<FileLineColLoc>(); |
| auto fileInfoCallBack = [&loc]() { |
| return std::pair<std::string, uint64_t>( |
| llvm::StringRef(loc.getFilename()), loc.getLine()); |
| }; |
| |
| os << llvm::format( |
| "_%x", ompBuilder.getTargetEntryUniqueInfo(fileInfoCallBack).FileID); |
| } |
| os << "_decl_tgt_ref_ptr"; |
| |
| return suffix; |
| } |
| |
| static bool isDeclareTargetLink(mlir::Value value) { |
| if (auto addressOfOp = |
| llvm::dyn_cast_if_present<LLVM::AddressOfOp>(value.getDefiningOp())) { |
| auto modOp = addressOfOp->getParentOfType<mlir::ModuleOp>(); |
| Operation *gOp = modOp.lookupSymbol(addressOfOp.getGlobalName()); |
| if (auto declareTargetGlobal = |
| llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(gOp)) |
| if (declareTargetGlobal.getDeclareTargetCaptureClause() == |
| mlir::omp::DeclareTargetCaptureClause::link) |
| return true; |
| } |
| return false; |
| } |
| |
| // Returns the reference pointer generated by the lowering of the declare target |
| // operation in cases where the link clause is used or the to clause is used in |
| // USM mode. |
| static llvm::Value * |
| getRefPtrIfDeclareTarget(mlir::Value value, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| |
| // An easier way to do this may just be to keep track of any pointer |
| // references and their mapping to their respective operation |
| if (auto addressOfOp = |
| llvm::dyn_cast_if_present<LLVM::AddressOfOp>(value.getDefiningOp())) { |
| if (auto gOp = llvm::dyn_cast_or_null<LLVM::GlobalOp>( |
| addressOfOp->getParentOfType<mlir::ModuleOp>().lookupSymbol( |
| addressOfOp.getGlobalName()))) { |
| |
| if (auto declareTargetGlobal = |
| llvm::dyn_cast<mlir::omp::DeclareTargetInterface>( |
| gOp.getOperation())) { |
| |
| // In this case, we must utilise the reference pointer generated by the |
| // declare target operation, similar to Clang |
| if ((declareTargetGlobal.getDeclareTargetCaptureClause() == |
| mlir::omp::DeclareTargetCaptureClause::link) || |
| (declareTargetGlobal.getDeclareTargetCaptureClause() == |
| mlir::omp::DeclareTargetCaptureClause::to && |
| ompBuilder->Config.hasRequiresUnifiedSharedMemory())) { |
| llvm::SmallString<64> suffix = |
| getDeclareTargetRefPtrSuffix(gOp, *ompBuilder); |
| |
| if (gOp.getSymName().contains(suffix)) |
| return moduleTranslation.getLLVMModule()->getNamedValue( |
| gOp.getSymName()); |
| |
| return moduleTranslation.getLLVMModule()->getNamedValue( |
| (gOp.getSymName().str() + suffix.str()).str()); |
| } |
| } |
| } |
| } |
| |
| return nullptr; |
| } |
| |
| // A small helper structure to contain data gathered |
| // for map lowering and coalese it into one area and |
| // avoiding extra computations such as searches in the |
| // llvm module for lowered mapped variables or checking |
| // if something is declare target (and retrieving the |
| // value) more than neccessary. |
| struct MapInfoData : llvm::OpenMPIRBuilder::MapInfosTy { |
| llvm::SmallVector<bool, 4> IsDeclareTarget; |
| llvm::SmallVector<bool, 4> IsAMember; |
| llvm::SmallVector<mlir::Operation *, 4> MapClause; |
| llvm::SmallVector<llvm::Value *, 4> OriginalValue; |
| // Stripped off array/pointer to get the underlying |
| // element type |
| llvm::SmallVector<llvm::Type *, 4> BaseType; |
| |
| /// Append arrays in \a CurInfo. |
| void append(MapInfoData &CurInfo) { |
| IsDeclareTarget.append(CurInfo.IsDeclareTarget.begin(), |
| CurInfo.IsDeclareTarget.end()); |
| MapClause.append(CurInfo.MapClause.begin(), CurInfo.MapClause.end()); |
| OriginalValue.append(CurInfo.OriginalValue.begin(), |
| CurInfo.OriginalValue.end()); |
| BaseType.append(CurInfo.BaseType.begin(), CurInfo.BaseType.end()); |
| llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo); |
| } |
| }; |
| |
| uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl) { |
| if (auto nestedArrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>( |
| arrTy.getElementType())) |
| return getArrayElementSizeInBits(nestedArrTy, dl); |
| return dl.getTypeSizeInBits(arrTy.getElementType()); |
| } |
| |
| // This function calculates the size to be offloaded for a specified type, given |
| // its associated map clause (which can contain bounds information which affects |
| // the total size), this size is calculated based on the underlying element type |
| // e.g. given a 1-D array of ints, we will calculate the size from the integer |
| // type * number of elements in the array. This size can be used in other |
| // calculations but is ultimately used as an argument to the OpenMP runtimes |
| // kernel argument structure which is generated through the combinedInfo data |
| // structures. |
| // This function is somewhat equivalent to Clang's getExprTypeSize inside of |
| // CGOpenMPRuntime.cpp. |
| llvm::Value *getSizeInBytes(DataLayout &dl, const mlir::Type &type, |
| Operation *clauseOp, llvm::Value *basePointer, |
| llvm::Type *baseType, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| // utilising getTypeSizeInBits instead of getTypeSize as getTypeSize gives |
| // the size in inconsistent byte or bit format. |
| uint64_t underlyingTypeSzInBits = dl.getTypeSizeInBits(type); |
| if (auto arrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(type)) |
| underlyingTypeSzInBits = getArrayElementSizeInBits(arrTy, dl); |
| |
| if (auto memberClause = |
| mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(clauseOp)) { |
| // This calculates the size to transfer based on bounds and the underlying |
| // element type, provided bounds have been specified (Fortran |
| // pointers/allocatables/target and arrays that have sections specified fall |
| // into this as well). |
| if (!memberClause.getBounds().empty()) { |
| llvm::Value *elementCount = builder.getInt64(1); |
| for (auto bounds : memberClause.getBounds()) { |
| if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>( |
| bounds.getDefiningOp())) { |
| // The below calculation for the size to be mapped calculated from the |
| // map.info's bounds is: (elemCount * [UB - LB] + 1), later we |
| // multiply by the underlying element types byte size to get the full |
| // size to be offloaded based on the bounds |
| elementCount = builder.CreateMul( |
| elementCount, |
| builder.CreateAdd( |
| builder.CreateSub( |
| moduleTranslation.lookupValue(boundOp.getUpperBound()), |
| moduleTranslation.lookupValue(boundOp.getLowerBound())), |
| builder.getInt64(1))); |
| } |
| } |
| |
| // The size in bytes x number of elements, the sizeInBytes stored is |
| // the underyling types size, e.g. if ptr<i32>, it'll be the i32's |
| // size, so we do some on the fly runtime math to get the size in |
| // bytes from the extent (ub - lb) * sizeInBytes. NOTE: This may need |
| // some adjustment for members with more complex types. |
| return builder.CreateMul(elementCount, |
| builder.getInt64(underlyingTypeSzInBits / 8)); |
| } |
| } |
| |
| return builder.getInt64(underlyingTypeSzInBits / 8); |
| } |
| |
| void collectMapDataFromMapOperands(MapInfoData &mapData, |
| llvm::SmallVectorImpl<Value> &mapOperands, |
| LLVM::ModuleTranslation &moduleTranslation, |
| DataLayout &dl, |
| llvm::IRBuilderBase &builder) { |
| for (mlir::Value mapValue : mapOperands) { |
| if (auto mapOp = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>( |
| mapValue.getDefiningOp())) { |
| mlir::Value offloadPtr = |
| mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr(); |
| mapData.OriginalValue.push_back( |
| moduleTranslation.lookupValue(offloadPtr)); |
| mapData.Pointers.push_back(mapData.OriginalValue.back()); |
| |
| if (llvm::Value *refPtr = |
| getRefPtrIfDeclareTarget(offloadPtr, |
| moduleTranslation)) { // declare target |
| mapData.IsDeclareTarget.push_back(true); |
| mapData.BasePointers.push_back(refPtr); |
| } else { // regular mapped variable |
| mapData.IsDeclareTarget.push_back(false); |
| mapData.BasePointers.push_back(mapData.OriginalValue.back()); |
| } |
| |
| mapData.BaseType.push_back( |
| moduleTranslation.convertType(mapOp.getVarType())); |
| mapData.Sizes.push_back( |
| getSizeInBytes(dl, mapOp.getVarType(), mapOp, mapData.Pointers.back(), |
| mapData.BaseType.back(), builder, moduleTranslation)); |
| mapData.MapClause.push_back(mapOp.getOperation()); |
| mapData.Types.push_back( |
| llvm::omp::OpenMPOffloadMappingFlags(mapOp.getMapType().value())); |
| mapData.Names.push_back(LLVM::createMappingInformation( |
| mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder())); |
| mapData.DevicePointers.push_back( |
| llvm::OpenMPIRBuilder::DeviceInfoTy::None); |
| |
| // Check if this is a member mapping and correctly assign that it is, if |
| // it is a member of a larger object. |
| // TODO: Need better handling of members, and distinguishing of members |
| // that are implicitly allocated on device vs explicitly passed in as |
| // arguments. |
| // TODO: May require some further additions to support nested record |
| // types, i.e. member maps that can have member maps. |
| mapData.IsAMember.push_back(false); |
| for (mlir::Value mapValue : mapOperands) { |
| if (auto map = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>( |
| mapValue.getDefiningOp())) { |
| for (auto member : map.getMembers()) { |
| if (member == mapOp) { |
| mapData.IsAMember.back() = true; |
| } |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| static int getMapDataMemberIdx(MapInfoData &mapData, |
| mlir::omp::MapInfoOp memberOp) { |
| auto *res = llvm::find(mapData.MapClause, memberOp); |
| assert(res != mapData.MapClause.end() && |
| "MapInfoOp for member not found in MapData, cannot return index"); |
| return std::distance(mapData.MapClause.begin(), res); |
| } |
| |
| static mlir::omp::MapInfoOp |
| getFirstOrLastMappedMemberPtr(mlir::omp::MapInfoOp mapInfo, bool first) { |
| mlir::DenseIntElementsAttr indexAttr = mapInfo.getMembersIndexAttr(); |
| |
| // Only 1 member has been mapped, we can return it. |
| if (indexAttr.size() == 1) |
| if (auto mapOp = mlir::dyn_cast<mlir::omp::MapInfoOp>( |
| mapInfo.getMembers()[0].getDefiningOp())) |
| return mapOp; |
| |
| llvm::ArrayRef<int64_t> shape = indexAttr.getShapedType().getShape(); |
| llvm::SmallVector<size_t> indices(shape[0]); |
| std::iota(indices.begin(), indices.end(), 0); |
| |
| llvm::sort(indices.begin(), indices.end(), |
| [&](const size_t a, const size_t b) { |
| auto indexValues = indexAttr.getValues<int32_t>(); |
| for (int i = 0; i < shape[1]; ++i) { |
| int aIndex = indexValues[a * shape[1] + i]; |
| int bIndex = indexValues[b * shape[1] + i]; |
| |
| if (aIndex == bIndex) |
| continue; |
| |
| if (aIndex != -1 && bIndex == -1) |
| return false; |
| |
| if (aIndex == -1 && bIndex != -1) |
| return true; |
| |
| // A is earlier in the record type layout than B |
| if (aIndex < bIndex) |
| return first; |
| |
| if (bIndex < aIndex) |
| return !first; |
| } |
| |
| // Iterated the entire list and couldn't make a decision, all |
| // elements were likely the same. Return false, since the sort |
| // comparator should return false for equal elements. |
| return false; |
| }); |
| |
| return llvm::cast<mlir::omp::MapInfoOp>( |
| mapInfo.getMembers()[indices.front()].getDefiningOp()); |
| } |
| |
| /// This function calculates the array/pointer offset for map data provided |
| /// with bounds operations, e.g. when provided something like the following: |
| /// |
| /// Fortran |
| /// map(tofrom: array(2:5, 3:2)) |
| /// or |
| /// C++ |
| /// map(tofrom: array[1:4][2:3]) |
| /// We must calculate the initial pointer offset to pass across, this function |
| /// performs this using bounds. |
| /// |
| /// NOTE: which while specified in row-major order it currently needs to be |
| /// flipped for Fortran's column order array allocation and access (as |
| /// opposed to C++'s row-major, hence the backwards processing where order is |
| /// important). This is likely important to keep in mind for the future when |
| /// we incorporate a C++ frontend, both frontends will need to agree on the |
| /// ordering of generated bounds operations (one may have to flip them) to |
| /// make the below lowering frontend agnostic. The offload size |
| /// calcualtion may also have to be adjusted for C++. |
| std::vector<llvm::Value *> |
| calculateBoundsOffset(LLVM::ModuleTranslation &moduleTranslation, |
| llvm::IRBuilderBase &builder, bool isArrayTy, |
| mlir::OperandRange bounds) { |
| std::vector<llvm::Value *> idx; |
| // There's no bounds to calculate an offset from, we can safely |
| // ignore and return no indices. |
| if (bounds.empty()) |
| return idx; |
| |
| // If we have an array type, then we have its type so can treat it as a |
| // normal GEP instruction where the bounds operations are simply indexes |
| // into the array. We currently do reverse order of the bounds, which |
| // I believe leans more towards Fortran's column-major in memory. |
| if (isArrayTy) { |
| idx.push_back(builder.getInt64(0)); |
| for (int i = bounds.size() - 1; i >= 0; --i) { |
| if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>( |
| bounds[i].getDefiningOp())) { |
| idx.push_back(moduleTranslation.lookupValue(boundOp.getLowerBound())); |
| } |
| } |
| } else { |
| // If we do not have an array type, but we have bounds, then we're dealing |
| // with a pointer that's being treated like an array and we have the |
| // underlying type e.g. an i32, or f64 etc, e.g. a fortran descriptor base |
| // address (pointer pointing to the actual data) so we must caclulate the |
| // offset using a single index which the following two loops attempts to |
| // compute. |
| |
| // Calculates the size offset we need to make per row e.g. first row or |
| // column only needs to be offset by one, but the next would have to be |
| // the previous row/column offset multiplied by the extent of current row. |
| // |
| // For example ([1][10][100]): |
| // |
| // - First row/column we move by 1 for each index increment |
| // - Second row/column we move by 1 (first row/column) * 10 (extent/size of |
| // current) for 10 for each index increment |
| // - Third row/column we would move by 10 (second row/column) * |
| // (extent/size of current) 100 for 1000 for each index increment |
| std::vector<llvm::Value *> dimensionIndexSizeOffset{builder.getInt64(1)}; |
| for (size_t i = 1; i < bounds.size(); ++i) { |
| if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>( |
| bounds[i].getDefiningOp())) { |
| dimensionIndexSizeOffset.push_back(builder.CreateMul( |
| moduleTranslation.lookupValue(boundOp.getExtent()), |
| dimensionIndexSizeOffset[i - 1])); |
| } |
| } |
| |
| // Now that we have calculated how much we move by per index, we must |
| // multiply each lower bound offset in indexes by the size offset we |
| // have calculated in the previous and accumulate the results to get |
| // our final resulting offset. |
| for (int i = bounds.size() - 1; i >= 0; --i) { |
| if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::MapBoundsOp>( |
| bounds[i].getDefiningOp())) { |
| if (idx.empty()) |
| idx.emplace_back(builder.CreateMul( |
| moduleTranslation.lookupValue(boundOp.getLowerBound()), |
| dimensionIndexSizeOffset[i])); |
| else |
| idx.back() = builder.CreateAdd( |
| idx.back(), builder.CreateMul(moduleTranslation.lookupValue( |
| boundOp.getLowerBound()), |
| dimensionIndexSizeOffset[i])); |
| } |
| } |
| } |
| |
| return idx; |
| } |
| |
| // This creates two insertions into the MapInfosTy data structure for the |
| // "parent" of a set of members, (usually a container e.g. |
| // class/structure/derived type) when subsequent members have also been |
| // explicitly mapped on the same map clause. Certain types, such as Fortran |
| // descriptors are mapped like this as well, however, the members are |
| // implicit as far as a user is concerned, but we must explicitly map them |
| // internally. |
| // |
| // This function also returns the memberOfFlag for this particular parent, |
| // which is utilised in subsequent member mappings (by modifying there map type |
| // with it) to indicate that a member is part of this parent and should be |
| // treated by the runtime as such. Important to achieve the correct mapping. |
| // |
| // This function borrows a lot from Clang's emitCombinedEntry function |
| // inside of CGOpenMPRuntime.cpp |
| static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers( |
| LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, |
| llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, |
| llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData, |
| uint64_t mapDataIndex, bool isTargetParams) { |
| // Map the first segment of our structure |
| combinedInfo.Types.emplace_back( |
| isTargetParams |
| ? llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
| : llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE); |
| combinedInfo.DevicePointers.emplace_back( |
| llvm::OpenMPIRBuilder::DeviceInfoTy::None); |
| combinedInfo.Names.emplace_back(LLVM::createMappingInformation( |
| mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder)); |
| combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]); |
| |
| // Calculate size of the parent object being mapped based on the |
| // addresses at runtime, highAddr - lowAddr = size. This of course |
| // doesn't factor in allocated data like pointers, hence the further |
| // processing of members specified by users, or in the case of |
| // Fortran pointers and allocatables, the mapping of the pointed to |
| // data by the descriptor (which itself, is a structure containing |
| // runtime information on the dynamically allocated data). |
| auto parentClause = |
| llvm::cast<mlir::omp::MapInfoOp>(mapData.MapClause[mapDataIndex]); |
| |
| llvm::Value *lowAddr, *highAddr; |
| if (!parentClause.getPartialMap()) { |
| lowAddr = builder.CreatePointerCast(mapData.Pointers[mapDataIndex], |
| builder.getPtrTy()); |
| highAddr = builder.CreatePointerCast( |
| builder.CreateConstGEP1_32(mapData.BaseType[mapDataIndex], |
| mapData.Pointers[mapDataIndex], 1), |
| builder.getPtrTy()); |
| combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]); |
| } else { |
| auto mapOp = |
| mlir::dyn_cast<mlir::omp::MapInfoOp>(mapData.MapClause[mapDataIndex]); |
| int firstMemberIdx = getMapDataMemberIdx( |
| mapData, getFirstOrLastMappedMemberPtr(mapOp, true)); |
| lowAddr = builder.CreatePointerCast(mapData.Pointers[firstMemberIdx], |
| builder.getPtrTy()); |
| int lastMemberIdx = getMapDataMemberIdx( |
| mapData, getFirstOrLastMappedMemberPtr(mapOp, false)); |
| highAddr = builder.CreatePointerCast( |
| builder.CreateGEP(mapData.BaseType[lastMemberIdx], |
| mapData.Pointers[lastMemberIdx], builder.getInt64(1)), |
| builder.getPtrTy()); |
| combinedInfo.Pointers.emplace_back(mapData.Pointers[firstMemberIdx]); |
| } |
| |
| llvm::Value *size = builder.CreateIntCast( |
| builder.CreatePtrDiff(builder.getInt8Ty(), highAddr, lowAddr), |
| builder.getInt64Ty(), |
| /*isSigned=*/false); |
| combinedInfo.Sizes.push_back(size); |
| |
| // TODO: This will need to be expanded to include the whole host of logic for |
| // the map flags that Clang currently supports (e.g. it should take the map |
| // flag of the parent map flag, remove the OMP_MAP_TARGET_PARAM and do some |
| // further case specific flag modifications). For the moment, it handles what |
| // we support as expected. |
| llvm::omp::OpenMPOffloadMappingFlags mapFlag = |
| llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; |
| |
| llvm::omp::OpenMPOffloadMappingFlags memberOfFlag = |
| ompBuilder.getMemberOfFlag(combinedInfo.BasePointers.size() - 1); |
| ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag); |
| |
| // This creates the initial MEMBER_OF mapping that consists of |
| // the parent/top level container (same as above effectively, except |
| // with a fixed initial compile time size and seperate maptype which |
| // indicates the true mape type (tofrom etc.). This parent mapping is |
| // only relevant if the structure in its totality is being mapped, |
| // otherwise the above suffices. |
| if (!parentClause.getPartialMap()) { |
| combinedInfo.Types.emplace_back(mapFlag); |
| combinedInfo.DevicePointers.emplace_back( |
| llvm::OpenMPIRBuilder::DeviceInfoTy::None); |
| combinedInfo.Names.emplace_back(LLVM::createMappingInformation( |
| mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder)); |
| combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]); |
| combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]); |
| combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIndex]); |
| } |
| return memberOfFlag; |
| } |
| |
| // The intent is to verify if the mapped data being passed is a |
| // pointer -> pointee that requires special handling in certain cases, |
| // e.g. applying the OMP_MAP_PTR_AND_OBJ map type. |
| // |
| // There may be a better way to verify this, but unfortunately with |
| // opaque pointers we lose the ability to easily check if something is |
| // a pointer whilst maintaining access to the underlying type. |
| static bool checkIfPointerMap(mlir::omp::MapInfoOp mapOp) { |
| // If we have a varPtrPtr field assigned then the underlying type is a pointer |
| if (mapOp.getVarPtrPtr()) |
| return true; |
| |
| // If the map data is declare target with a link clause, then it's represented |
| // as a pointer when we lower it to LLVM-IR even if at the MLIR level it has |
| // no relation to pointers. |
| if (isDeclareTargetLink(mapOp.getVarPtr())) |
| return true; |
| |
| return false; |
| } |
| |
| // This function is intended to add explicit mappings of members |
| static void processMapMembersWithParent( |
| LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, |
| llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, |
| llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData, |
| uint64_t mapDataIndex, llvm::omp::OpenMPOffloadMappingFlags memberOfFlag) { |
| |
| auto parentClause = |
| llvm::cast<mlir::omp::MapInfoOp>(mapData.MapClause[mapDataIndex]); |
| |
| for (auto mappedMembers : parentClause.getMembers()) { |
| auto memberClause = |
| llvm::cast<mlir::omp::MapInfoOp>(mappedMembers.getDefiningOp()); |
| int memberDataIdx = getMapDataMemberIdx(mapData, memberClause); |
| |
| assert(memberDataIdx >= 0 && "could not find mapped member of structure"); |
| |
| // Same MemberOfFlag to indicate its link with parent and other members |
| // of. |
| auto mapFlag = |
| llvm::omp::OpenMPOffloadMappingFlags(memberClause.getMapType().value()); |
| mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; |
| mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF; |
| ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag); |
| if (checkIfPointerMap(memberClause)) |
| mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; |
| |
| combinedInfo.Types.emplace_back(mapFlag); |
| combinedInfo.DevicePointers.emplace_back( |
| llvm::OpenMPIRBuilder::DeviceInfoTy::None); |
| combinedInfo.Names.emplace_back( |
| LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder)); |
| combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]); |
| combinedInfo.Pointers.emplace_back(mapData.Pointers[memberDataIdx]); |
| combinedInfo.Sizes.emplace_back(mapData.Sizes[memberDataIdx]); |
| } |
| } |
| |
| static void |
| processIndividualMap(MapInfoData &mapData, size_t mapDataIdx, |
| llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, |
| bool isTargetParams, int mapDataParentIdx = -1) { |
| // Declare Target Mappings are excluded from being marked as |
| // OMP_MAP_TARGET_PARAM as they are not passed as parameters, they're |
| // marked with OMP_MAP_PTR_AND_OBJ instead. |
| auto mapFlag = mapData.Types[mapDataIdx]; |
| auto mapInfoOp = |
| llvm::cast<mlir::omp::MapInfoOp>(mapData.MapClause[mapDataIdx]); |
| |
| bool isPtrTy = checkIfPointerMap(mapInfoOp); |
| if (isPtrTy) |
| mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; |
| |
| if (isTargetParams && !mapData.IsDeclareTarget[mapDataIdx]) |
| mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; |
| |
| if (mapInfoOp.getMapCaptureType().value() == |
| mlir::omp::VariableCaptureKind::ByCopy && |
| !isPtrTy) |
| mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL; |
| |
| // if we're provided a mapDataParentIdx, then the data being mapped is |
| // part of a larger object (in a parent <-> member mapping) and in this |
| // case our BasePointer should be the parent. |
| if (mapDataParentIdx >= 0) |
| combinedInfo.BasePointers.emplace_back( |
| mapData.BasePointers[mapDataParentIdx]); |
| else |
| combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIdx]); |
| |
| combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIdx]); |
| combinedInfo.DevicePointers.emplace_back(mapData.DevicePointers[mapDataIdx]); |
| combinedInfo.Names.emplace_back(mapData.Names[mapDataIdx]); |
| combinedInfo.Types.emplace_back(mapFlag); |
| combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIdx]); |
| } |
| |
| static void processMapWithMembersOf( |
| LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, |
| llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, |
| llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData, |
| uint64_t mapDataIndex, bool isTargetParams) { |
| auto parentClause = |
| llvm::cast<mlir::omp::MapInfoOp>(mapData.MapClause[mapDataIndex]); |
| |
| // If we have a partial map (no parent referenced in the map clauses of the |
| // directive, only members) and only a single member, we do not need to bind |
| // the map of the member to the parent, we can pass the member seperately. |
| if (parentClause.getMembers().size() == 1 && parentClause.getPartialMap()) { |
| auto memberClause = llvm::cast<mlir::omp::MapInfoOp>( |
| parentClause.getMembers()[0].getDefiningOp()); |
| int memberDataIdx = getMapDataMemberIdx(mapData, memberClause); |
| // Note: Clang treats arrays with explicit bounds that fall into this |
| // category as a parent with map case, however, it seems this isn't a |
| // requirement, and processing them as an individual map is fine. So, |
| // we will handle them as individual maps for the moment, as it's |
| // difficult for us to check this as we always require bounds to be |
| // specified currently and it's also marginally more optimal (single |
| // map rather than two). The difference may come from the fact that |
| // Clang maps array without bounds as pointers (which we do not |
| // currently do), whereas we treat them as arrays in all cases |
| // currently. |
| processIndividualMap(mapData, memberDataIdx, combinedInfo, isTargetParams, |
| mapDataIndex); |
| return; |
| } |
| |
| llvm::omp::OpenMPOffloadMappingFlags memberOfParentFlag = |
| mapParentWithMembers(moduleTranslation, builder, ompBuilder, dl, |
| combinedInfo, mapData, mapDataIndex, isTargetParams); |
| processMapMembersWithParent(moduleTranslation, builder, ompBuilder, dl, |
| combinedInfo, mapData, mapDataIndex, |
| memberOfParentFlag); |
| } |
| |
| // This is a variation on Clang's GenerateOpenMPCapturedVars, which |
| // generates different operation (e.g. load/store) combinations for |
| // arguments to the kernel, based on map capture kinds which are then |
| // utilised in the combinedInfo in place of the original Map value. |
| static void |
| createAlteredByCaptureMap(MapInfoData &mapData, |
| LLVM::ModuleTranslation &moduleTranslation, |
| llvm::IRBuilderBase &builder) { |
| for (size_t i = 0; i < mapData.MapClause.size(); ++i) { |
| // if it's declare target, skip it, it's handled seperately. |
| if (!mapData.IsDeclareTarget[i]) { |
| auto mapOp = |
| mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(mapData.MapClause[i]); |
| mlir::omp::VariableCaptureKind captureKind = |
| mapOp.getMapCaptureType().value_or( |
| mlir::omp::VariableCaptureKind::ByRef); |
| bool isPtrTy = checkIfPointerMap(mapOp); |
| |
| // Currently handles array sectioning lowerbound case, but more |
| // logic may be required in the future. Clang invokes EmitLValue, |
| // which has specialised logic for special Clang types such as user |
| // defines, so it is possible we will have to extend this for |
| // structures or other complex types. As the general idea is that this |
| // function mimics some of the logic from Clang that we require for |
| // kernel argument passing from host -> device. |
| switch (captureKind) { |
| case mlir::omp::VariableCaptureKind::ByRef: { |
| llvm::Value *newV = mapData.Pointers[i]; |
| std::vector<llvm::Value *> offsetIdx = calculateBoundsOffset( |
| moduleTranslation, builder, mapData.BaseType[i]->isArrayTy(), |
| mapOp.getBounds()); |
| if (isPtrTy) |
| newV = builder.CreateLoad(builder.getPtrTy(), newV); |
| |
| if (!offsetIdx.empty()) |
| newV = builder.CreateInBoundsGEP(mapData.BaseType[i], newV, offsetIdx, |
| "array_offset"); |
| mapData.Pointers[i] = newV; |
| } break; |
| case mlir::omp::VariableCaptureKind::ByCopy: { |
| llvm::Type *type = mapData.BaseType[i]; |
| llvm::Value *newV; |
| if (mapData.Pointers[i]->getType()->isPointerTy()) |
| newV = builder.CreateLoad(type, mapData.Pointers[i]); |
| else |
| newV = mapData.Pointers[i]; |
| |
| if (!isPtrTy) { |
| auto curInsert = builder.saveIP(); |
| builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation)); |
| auto *memTempAlloc = |
| builder.CreateAlloca(builder.getPtrTy(), nullptr, ".casted"); |
| builder.restoreIP(curInsert); |
| |
| builder.CreateStore(newV, memTempAlloc); |
| newV = builder.CreateLoad(builder.getPtrTy(), memTempAlloc); |
| } |
| |
| mapData.Pointers[i] = newV; |
| mapData.BasePointers[i] = newV; |
| } break; |
| case mlir::omp::VariableCaptureKind::This: |
| case mlir::omp::VariableCaptureKind::VLAType: |
| mapData.MapClause[i]->emitOpError("Unhandled capture kind"); |
| break; |
| } |
| } |
| } |
| } |
| |
| // Generate all map related information and fill the combinedInfo. |
| static void genMapInfos(llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation, |
| DataLayout &dl, |
| llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, |
| MapInfoData &mapData, |
| const SmallVector<Value> &devPtrOperands = {}, |
| const SmallVector<Value> &devAddrOperands = {}, |
| bool isTargetParams = false) { |
| // We wish to modify some of the methods in which arguments are |
| // passed based on their capture type by the target region, this can |
| // involve generating new loads and stores, which changes the |
| // MLIR value to LLVM value mapping, however, we only wish to do this |
| // locally for the current function/target and also avoid altering |
| // ModuleTranslation, so we remap the base pointer or pointer stored |
| // in the map infos corresponding MapInfoData, which is later accessed |
| // by genMapInfos and createTarget to help generate the kernel and |
| // kernel arg structure. It primarily becomes relevant in cases like |
| // bycopy, or byref range'd arrays. In the default case, we simply |
| // pass thee pointer byref as both basePointer and pointer. |
| if (!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice()) |
| createAlteredByCaptureMap(mapData, moduleTranslation, builder); |
| |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| |
| auto fail = [&combinedInfo]() -> void { |
| combinedInfo.BasePointers.clear(); |
| combinedInfo.Pointers.clear(); |
| combinedInfo.DevicePointers.clear(); |
| combinedInfo.Sizes.clear(); |
| combinedInfo.Types.clear(); |
| combinedInfo.Names.clear(); |
| }; |
| |
| // We operate under the assumption that all vectors that are |
| // required in MapInfoData are of equal lengths (either filled with |
| // default constructed data or appropiate information) so we can |
| // utilise the size from any component of MapInfoData, if we can't |
| // something is missing from the initial MapInfoData construction. |
| for (size_t i = 0; i < mapData.MapClause.size(); ++i) { |
| // NOTE/TODO: We currently do not support arbitrary depth record |
| // type mapping. |
| if (mapData.IsAMember[i]) |
| continue; |
| |
| auto mapInfoOp = mlir::dyn_cast<mlir::omp::MapInfoOp>(mapData.MapClause[i]); |
| if (!mapInfoOp.getMembers().empty()) { |
| processMapWithMembersOf(moduleTranslation, builder, *ompBuilder, dl, |
| combinedInfo, mapData, i, isTargetParams); |
| continue; |
| } |
| |
| processIndividualMap(mapData, i, combinedInfo, isTargetParams); |
| } |
| |
| auto findMapInfo = [&combinedInfo](llvm::Value *val, unsigned &index) { |
| index = 0; |
| for (llvm::Value *basePtr : combinedInfo.BasePointers) { |
| if (basePtr == val) |
| return true; |
| index++; |
| } |
| return false; |
| }; |
| |
| auto addDevInfos = [&, fail](auto devOperands, auto devOpType) -> void { |
| for (const auto &devOp : devOperands) { |
| // TODO: Only LLVMPointerTypes are handled. |
| if (!isa<LLVM::LLVMPointerType>(devOp.getType())) |
| return fail(); |
| |
| llvm::Value *mapOpValue = moduleTranslation.lookupValue(devOp); |
| |
| // Check if map info is already present for this entry. |
| unsigned infoIndex; |
| if (findMapInfo(mapOpValue, infoIndex)) { |
| combinedInfo.Types[infoIndex] |= |
| llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM; |
| combinedInfo.DevicePointers[infoIndex] = devOpType; |
| } else { |
| combinedInfo.BasePointers.emplace_back(mapOpValue); |
| combinedInfo.Pointers.emplace_back(mapOpValue); |
| combinedInfo.DevicePointers.emplace_back(devOpType); |
| combinedInfo.Names.emplace_back( |
| LLVM::createMappingInformation(devOp.getLoc(), *ompBuilder)); |
| combinedInfo.Types.emplace_back( |
| llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM); |
| combinedInfo.Sizes.emplace_back(builder.getInt64(0)); |
| } |
| } |
| }; |
| |
| addDevInfos(devPtrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer); |
| addDevInfos(devAddrOperands, llvm::OpenMPIRBuilder::DeviceInfoTy::Address); |
| } |
| |
| static LogicalResult |
| convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| llvm::Value *ifCond = nullptr; |
| int64_t deviceID = llvm::omp::OMP_DEVICEID_UNDEF; |
| SmallVector<Value> mapOperands; |
| SmallVector<Value> useDevPtrOperands; |
| SmallVector<Value> useDevAddrOperands; |
| llvm::omp::RuntimeFunction RTLFn; |
| DataLayout DL = DataLayout(op->getParentOfType<ModuleOp>()); |
| |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| |
| LogicalResult result = |
| llvm::TypeSwitch<Operation *, LogicalResult>(op) |
| .Case([&](omp::TargetDataOp dataOp) { |
| if (auto ifExprVar = dataOp.getIfExpr()) |
| ifCond = moduleTranslation.lookupValue(ifExprVar); |
| |
| if (auto devId = dataOp.getDevice()) |
| if (auto constOp = |
| dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp())) |
| if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue())) |
| deviceID = intAttr.getInt(); |
| |
| mapOperands = dataOp.getMapOperands(); |
| useDevPtrOperands = dataOp.getUseDevicePtr(); |
| useDevAddrOperands = dataOp.getUseDeviceAddr(); |
| return success(); |
| }) |
| .Case([&](omp::TargetEnterDataOp enterDataOp) { |
| if (enterDataOp.getNowait()) |
| return (LogicalResult)(enterDataOp.emitError( |
| "`nowait` is not supported yet")); |
| |
| if (auto ifExprVar = enterDataOp.getIfExpr()) |
| ifCond = moduleTranslation.lookupValue(ifExprVar); |
| |
| if (auto devId = enterDataOp.getDevice()) |
| if (auto constOp = |
| dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp())) |
| if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue())) |
| deviceID = intAttr.getInt(); |
| RTLFn = llvm::omp::OMPRTL___tgt_target_data_begin_mapper; |
| mapOperands = enterDataOp.getMapOperands(); |
| return success(); |
| }) |
| .Case([&](omp::TargetExitDataOp exitDataOp) { |
| if (exitDataOp.getNowait()) |
| return (LogicalResult)(exitDataOp.emitError( |
| "`nowait` is not supported yet")); |
| |
| if (auto ifExprVar = exitDataOp.getIfExpr()) |
| ifCond = moduleTranslation.lookupValue(ifExprVar); |
| |
| if (auto devId = exitDataOp.getDevice()) |
| if (auto constOp = |
| dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp())) |
| if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue())) |
| deviceID = intAttr.getInt(); |
| |
| RTLFn = llvm::omp::OMPRTL___tgt_target_data_end_mapper; |
| mapOperands = exitDataOp.getMapOperands(); |
| return success(); |
| }) |
| .Case([&](omp::TargetUpdateOp updateDataOp) { |
| if (updateDataOp.getNowait()) |
| return (LogicalResult)(updateDataOp.emitError( |
| "`nowait` is not supported yet")); |
| |
| if (auto ifExprVar = updateDataOp.getIfExpr()) |
| ifCond = moduleTranslation.lookupValue(ifExprVar); |
| |
| if (auto devId = updateDataOp.getDevice()) |
| if (auto constOp = |
| dyn_cast<LLVM::ConstantOp>(devId.getDefiningOp())) |
| if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue())) |
| deviceID = intAttr.getInt(); |
| |
| RTLFn = llvm::omp::OMPRTL___tgt_target_data_update_mapper; |
| mapOperands = updateDataOp.getMapOperands(); |
| return success(); |
| }) |
| .Default([&](Operation *op) { |
| return op->emitError("unsupported OpenMP operation: ") |
| << op->getName(); |
| }); |
| |
| if (failed(result)) |
| return failure(); |
| |
| using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
| |
| MapInfoData mapData; |
| collectMapDataFromMapOperands(mapData, mapOperands, moduleTranslation, DL, |
| builder); |
| |
| // Fill up the arrays with all the mapped variables. |
| llvm::OpenMPIRBuilder::MapInfosTy combinedInfo; |
| auto genMapInfoCB = |
| [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & { |
| builder.restoreIP(codeGenIP); |
| if (auto dataOp = dyn_cast<omp::TargetDataOp>(op)) { |
| genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapData, |
| useDevPtrOperands, useDevAddrOperands); |
| } else { |
| genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapData); |
| } |
| return combinedInfo; |
| }; |
| |
| llvm::OpenMPIRBuilder::TargetDataInfo info(/*RequiresDevicePointerInfo=*/true, |
| /*SeparateBeginEndCalls=*/true); |
| |
| using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy; |
| LogicalResult bodyGenStatus = success(); |
| auto bodyGenCB = [&](InsertPointTy codeGenIP, BodyGenTy bodyGenType) { |
| assert(isa<omp::TargetDataOp>(op) && |
| "BodyGen requested for non TargetDataOp"); |
| Region ®ion = cast<omp::TargetDataOp>(op).getRegion(); |
| switch (bodyGenType) { |
| case BodyGenTy::Priv: |
| // Check if any device ptr/addr info is available |
| if (!info.DevicePtrInfoMap.empty()) { |
| builder.restoreIP(codeGenIP); |
| unsigned argIndex = 0; |
| for (auto &devPtrOp : useDevPtrOperands) { |
| llvm::Value *mapOpValue = moduleTranslation.lookupValue(devPtrOp); |
| const auto &arg = region.front().getArgument(argIndex); |
| moduleTranslation.mapValue(arg, |
| info.DevicePtrInfoMap[mapOpValue].second); |
| argIndex++; |
| } |
| |
| for (auto &devAddrOp : useDevAddrOperands) { |
| llvm::Value *mapOpValue = moduleTranslation.lookupValue(devAddrOp); |
| const auto &arg = region.front().getArgument(argIndex); |
| auto *LI = builder.CreateLoad( |
| builder.getPtrTy(), info.DevicePtrInfoMap[mapOpValue].second); |
| moduleTranslation.mapValue(arg, LI); |
| argIndex++; |
| } |
| |
| bodyGenStatus = inlineConvertOmpRegions(region, "omp.data.region", |
| builder, moduleTranslation); |
| } |
| break; |
| case BodyGenTy::DupNoPriv: |
| break; |
| case BodyGenTy::NoPriv: |
| // If device info is available then region has already been generated |
| if (info.DevicePtrInfoMap.empty()) { |
| builder.restoreIP(codeGenIP); |
| bodyGenStatus = inlineConvertOmpRegions(region, "omp.data.region", |
| builder, moduleTranslation); |
| } |
| break; |
| } |
| return builder.saveIP(); |
| }; |
| |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| llvm::OpenMPIRBuilder::InsertPointTy allocaIP = |
| findAllocaInsertPoint(builder, moduleTranslation); |
| if (isa<omp::TargetDataOp>(op)) { |
| builder.restoreIP(ompBuilder->createTargetData( |
| ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID), ifCond, |
| info, genMapInfoCB, nullptr, bodyGenCB)); |
| } else { |
| builder.restoreIP(ompBuilder->createTargetData( |
| ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID), ifCond, |
| info, genMapInfoCB, &RTLFn)); |
| } |
| |
| return bodyGenStatus; |
| } |
| |
| /// Lowers the FlagsAttr which is applied to the module on the device |
| /// pass when offloading, this attribute contains OpenMP RTL globals that can |
| /// be passed as flags to the frontend, otherwise they are set to default |
| LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| if (!cast<mlir::ModuleOp>(op)) |
| return failure(); |
| |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| |
| ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp-device", |
| attribute.getOpenmpDeviceVersion()); |
| |
| if (attribute.getNoGpuLib()) |
| return success(); |
| |
| ompBuilder->createGlobalFlag( |
| attribute.getDebugKind() /*LangOpts().OpenMPTargetDebug*/, |
| "__omp_rtl_debug_kind"); |
| ompBuilder->createGlobalFlag( |
| attribute |
| .getAssumeTeamsOversubscription() /*LangOpts().OpenMPTeamSubscription*/ |
| , |
| "__omp_rtl_assume_teams_oversubscription"); |
| ompBuilder->createGlobalFlag( |
| attribute |
| .getAssumeThreadsOversubscription() /*LangOpts().OpenMPThreadSubscription*/ |
| , |
| "__omp_rtl_assume_threads_oversubscription"); |
| ompBuilder->createGlobalFlag( |
| attribute.getAssumeNoThreadState() /*LangOpts().OpenMPNoThreadState*/, |
| "__omp_rtl_assume_no_thread_state"); |
| ompBuilder->createGlobalFlag( |
| attribute |
| .getAssumeNoNestedParallelism() /*LangOpts().OpenMPNoNestedParallelism*/ |
| , |
| "__omp_rtl_assume_no_nested_parallelism"); |
| return success(); |
| } |
| |
| static bool getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo, |
| omp::TargetOp targetOp, |
| llvm::StringRef parentName = "") { |
| auto fileLoc = targetOp.getLoc()->findInstanceOf<FileLineColLoc>(); |
| |
| assert(fileLoc && "No file found from location"); |
| StringRef fileName = fileLoc.getFilename().getValue(); |
| |
| llvm::sys::fs::UniqueID id; |
| if (auto ec = llvm::sys::fs::getUniqueID(fileName, id)) { |
| targetOp.emitError("Unable to get unique ID for file"); |
| return false; |
| } |
| |
| uint64_t line = fileLoc.getLine(); |
| targetInfo = llvm::TargetRegionEntryInfo(parentName, id.getDevice(), |
| id.getFile(), line); |
| return true; |
| } |
| |
| static bool targetOpSupported(Operation &opInst) { |
| auto targetOp = cast<omp::TargetOp>(opInst); |
| if (targetOp.getIfExpr()) { |
| opInst.emitError("If clause not yet supported"); |
| return false; |
| } |
| |
| if (targetOp.getDevice()) { |
| opInst.emitError("Device clause not yet supported"); |
| return false; |
| } |
| |
| if (targetOp.getThreadLimit()) { |
| opInst.emitError("Thread limit clause not yet supported"); |
| return false; |
| } |
| |
| if (targetOp.getNowait()) { |
| opInst.emitError("Nowait clause not yet supported"); |
| return false; |
| } |
| |
| return true; |
| } |
| |
| static void |
| handleDeclareTargetMapVar(MapInfoData &mapData, |
| LLVM::ModuleTranslation &moduleTranslation, |
| llvm::IRBuilderBase &builder) { |
| for (size_t i = 0; i < mapData.MapClause.size(); ++i) { |
| // In the case of declare target mapped variables, the basePointer is |
| // the reference pointer generated by the convertDeclareTargetAttr |
| // method. Whereas the kernelValue is the original variable, so for |
| // the device we must replace all uses of this original global variable |
| // (stored in kernelValue) with the reference pointer (stored in |
| // basePointer for declare target mapped variables), as for device the |
| // data is mapped into this reference pointer and should be loaded |
| // from it, the original variable is discarded. On host both exist and |
| // metadata is generated (elsewhere in the convertDeclareTargetAttr) |
| // function to link the two variables in the runtime and then both the |
| // reference pointer and the pointer are assigned in the kernel argument |
| // structure for the host. |
| if (mapData.IsDeclareTarget[i]) { |
| // The users iterator will get invalidated if we modify an element, |
| // so we populate this vector of uses to alter each user on an individual |
| // basis to emit its own load (rather than one load for all). |
| llvm::SmallVector<llvm::User *> userVec; |
| for (llvm::User *user : mapData.OriginalValue[i]->users()) |
| userVec.push_back(user); |
| |
| for (llvm::User *user : userVec) { |
| if (auto *insn = dyn_cast<llvm::Instruction>(user)) { |
| auto *load = builder.CreateLoad(mapData.BasePointers[i]->getType(), |
| mapData.BasePointers[i]); |
| load->moveBefore(insn); |
| user->replaceUsesOfWith(mapData.OriginalValue[i], load); |
| } |
| } |
| } |
| } |
| } |
| |
| // The createDeviceArgumentAccessor function generates |
| // instructions for retrieving (acessing) kernel |
| // arguments inside of the device kernel for use by |
| // the kernel. This enables different semantics such as |
| // the creation of temporary copies of data allowing |
| // semantics like read-only/no host write back kernel |
| // arguments. |
| // |
| // This currently implements a very light version of Clang's |
| // EmitParmDecl's handling of direct argument handling as well |
| // as a portion of the argument access generation based on |
| // capture types found at the end of emitOutlinedFunctionPrologue |
| // in Clang. The indirect path handling of EmitParmDecl's may be |
| // required for future work, but a direct 1-to-1 copy doesn't seem |
| // possible as the logic is rather scattered throughout Clang's |
| // lowering and perhaps we wish to deviate slightly. |
| // |
| // \param mapData - A container containing vectors of information |
| // corresponding to the input argument, which should have a |
| // corresponding entry in the MapInfoData containers |
| // OrigialValue's. |
| // \param arg - This is the generated kernel function argument that |
| // corresponds to the passed in input argument. We generated different |
| // accesses of this Argument, based on capture type and other Input |
| // related information. |
| // \param input - This is the host side value that will be passed to |
| // the kernel i.e. the kernel input, we rewrite all uses of this within |
| // the kernel (as we generate the kernel body based on the target's region |
| // which maintians references to the original input) to the retVal argument |
| // apon exit of this function inside of the OMPIRBuilder. This interlinks |
| // the kernel argument to future uses of it in the function providing |
| // appropriate "glue" instructions inbetween. |
| // \param retVal - This is the value that all uses of input inside of the |
| // kernel will be re-written to, the goal of this function is to generate |
| // an appropriate location for the kernel argument to be accessed from, |
| // e.g. ByRef will result in a temporary allocation location and then |
| // a store of the kernel argument into this allocated memory which |
| // will then be loaded from, ByCopy will use the allocated memory |
| // directly. |
| static llvm::IRBuilderBase::InsertPoint |
| createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg, |
| llvm::Value *input, llvm::Value *&retVal, |
| llvm::IRBuilderBase &builder, |
| llvm::OpenMPIRBuilder &ompBuilder, |
| LLVM::ModuleTranslation &moduleTranslation, |
| llvm::IRBuilderBase::InsertPoint allocaIP, |
| llvm::IRBuilderBase::InsertPoint codeGenIP) { |
| builder.restoreIP(allocaIP); |
| |
| mlir::omp::VariableCaptureKind capture = |
| mlir::omp::VariableCaptureKind::ByRef; |
| |
| // Find the associated MapInfoData entry for the current input |
| for (size_t i = 0; i < mapData.MapClause.size(); ++i) |
| if (mapData.OriginalValue[i] == input) { |
| if (auto mapOp = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>( |
| mapData.MapClause[i])) { |
| capture = mapOp.getMapCaptureType().value_or( |
| mlir::omp::VariableCaptureKind::ByRef); |
| } |
| |
| break; |
| } |
| |
| unsigned int allocaAS = ompBuilder.M.getDataLayout().getAllocaAddrSpace(); |
| unsigned int defaultAS = |
| ompBuilder.M.getDataLayout().getProgramAddressSpace(); |
| |
| // Create the alloca for the argument the current point. |
| llvm::Value *v = builder.CreateAlloca(arg.getType(), allocaAS); |
| |
| if (allocaAS != defaultAS && arg.getType()->isPointerTy()) |
| v = builder.CreatePointerBitCastOrAddrSpaceCast( |
| v, arg.getType()->getPointerTo(defaultAS)); |
| |
| builder.CreateStore(&arg, v); |
| |
| builder.restoreIP(codeGenIP); |
| |
| switch (capture) { |
| case mlir::omp::VariableCaptureKind::ByCopy: { |
| retVal = v; |
| break; |
| } |
| case mlir::omp::VariableCaptureKind::ByRef: { |
| retVal = builder.CreateAlignedLoad( |
| v->getType(), v, |
| ompBuilder.M.getDataLayout().getPrefTypeAlign(v->getType())); |
| break; |
| } |
| case mlir::omp::VariableCaptureKind::This: |
| case mlir::omp::VariableCaptureKind::VLAType: |
| assert(false && "Currently unsupported capture kind"); |
| break; |
| } |
| |
| return builder.saveIP(); |
| } |
| |
| static LogicalResult |
| convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| |
| if (!targetOpSupported(opInst)) |
| return failure(); |
| |
| auto parentFn = opInst.getParentOfType<LLVM::LLVMFuncOp>(); |
| auto targetOp = cast<omp::TargetOp>(opInst); |
| auto &targetRegion = targetOp.getRegion(); |
| DataLayout dl = DataLayout(opInst.getParentOfType<ModuleOp>()); |
| SmallVector<Value> mapOperands = targetOp.getMapOperands(); |
| |
| LogicalResult bodyGenStatus = success(); |
| using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
| auto bodyCB = [&](InsertPointTy allocaIP, |
| InsertPointTy codeGenIP) -> InsertPointTy { |
| // Forward target-cpu and target-features function attributes from the |
| // original function to the new outlined function. |
| llvm::Function *llvmParentFn = |
| moduleTranslation.lookupFunction(parentFn.getName()); |
| llvm::Function *llvmOutlinedFn = codeGenIP.getBlock()->getParent(); |
| assert(llvmParentFn && llvmOutlinedFn && |
| "Both parent and outlined functions must exist at this point"); |
| |
| if (auto attr = llvmParentFn->getFnAttribute("target-cpu"); |
| attr.isStringAttribute()) |
| llvmOutlinedFn->addFnAttr(attr); |
| |
| if (auto attr = llvmParentFn->getFnAttribute("target-features"); |
| attr.isStringAttribute()) |
| llvmOutlinedFn->addFnAttr(attr); |
| |
| builder.restoreIP(codeGenIP); |
| unsigned argIndex = 0; |
| for (auto &mapOp : mapOperands) { |
| auto mapInfoOp = |
| mlir::dyn_cast<mlir::omp::MapInfoOp>(mapOp.getDefiningOp()); |
| llvm::Value *mapOpValue = |
| moduleTranslation.lookupValue(mapInfoOp.getVarPtr()); |
| const auto &arg = targetRegion.front().getArgument(argIndex); |
| moduleTranslation.mapValue(arg, mapOpValue); |
| argIndex++; |
| } |
| llvm::BasicBlock *exitBlock = convertOmpOpRegions( |
| targetRegion, "omp.target", builder, moduleTranslation, bodyGenStatus); |
| builder.SetInsertPoint(exitBlock); |
| return builder.saveIP(); |
| }; |
| |
| llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); |
| StringRef parentName = parentFn.getName(); |
| |
| llvm::TargetRegionEntryInfo entryInfo; |
| |
| if (!getTargetEntryUniqueInfo(entryInfo, targetOp, parentName)) |
| return failure(); |
| |
| int32_t defaultValTeams = -1; |
| int32_t defaultValThreads = 0; |
| |
| llvm::OpenMPIRBuilder::InsertPointTy allocaIP = |
| findAllocaInsertPoint(builder, moduleTranslation); |
| |
| MapInfoData mapData; |
| collectMapDataFromMapOperands(mapData, mapOperands, moduleTranslation, dl, |
| builder); |
| |
| llvm::OpenMPIRBuilder::MapInfosTy combinedInfos; |
| auto genMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP) |
| -> llvm::OpenMPIRBuilder::MapInfosTy & { |
| builder.restoreIP(codeGenIP); |
| genMapInfos(builder, moduleTranslation, dl, combinedInfos, mapData, {}, {}, |
| true); |
| return combinedInfos; |
| }; |
| |
| auto argAccessorCB = [&](llvm::Argument &arg, llvm::Value *input, |
| llvm::Value *&retVal, InsertPointTy allocaIP, |
| InsertPointTy codeGenIP) { |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| |
| // We just return the unaltered argument for the host function |
| // for now, some alterations may be required in the future to |
| // keep host fallback functions working identically to the device |
| // version (e.g. pass ByCopy values should be treated as such on |
| // host and device, currently not always the case) |
| if (!ompBuilder->Config.isTargetDevice()) { |
| retVal = cast<llvm::Value>(&arg); |
| return codeGenIP; |
| } |
| |
| return createDeviceArgumentAccessor(mapData, arg, input, retVal, builder, |
| *ompBuilder, moduleTranslation, |
| allocaIP, codeGenIP); |
| }; |
| |
| llvm::SmallVector<llvm::Value *, 4> kernelInput; |
| for (size_t i = 0; i < mapOperands.size(); ++i) { |
| // declare target arguments are not passed to kernels as arguments |
| // TODO: We currently do not handle cases where a member is explicitly |
| // passed in as an argument, this will likley need to be handled in |
| // the near future, rather than using IsAMember, it may be better to |
| // test if the relevant BlockArg is used within the target region and |
| // then use that as a basis for exclusion in the kernel inputs. |
| if (!mapData.IsDeclareTarget[i] && !mapData.IsAMember[i]) |
| kernelInput.push_back(mapData.OriginalValue[i]); |
| } |
| |
| builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTarget( |
| ompLoc, allocaIP, builder.saveIP(), entryInfo, defaultValTeams, |
| defaultValThreads, kernelInput, genMapInfoCB, bodyCB, argAccessorCB)); |
| |
| // Remap access operations to declare target reference pointers for the |
| // device, essentially generating extra loadop's as necessary |
| if (moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice()) |
| handleDeclareTargetMapVar(mapData, moduleTranslation, builder); |
| |
| return bodyGenStatus; |
| } |
| |
| static LogicalResult |
| convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| // Amend omp.declare_target by deleting the IR of the outlined functions |
| // created for target regions. They cannot be filtered out from MLIR earlier |
| // because the omp.target operation inside must be translated to LLVM, but |
| // the wrapper functions themselves must not remain at the end of the |
| // process. We know that functions where omp.declare_target does not match |
| // omp.is_target_device at this stage can only be wrapper functions because |
| // those that aren't are removed earlier as an MLIR transformation pass. |
| if (FunctionOpInterface funcOp = dyn_cast<FunctionOpInterface>(op)) { |
| if (auto offloadMod = dyn_cast<omp::OffloadModuleInterface>( |
| op->getParentOfType<ModuleOp>().getOperation())) { |
| if (!offloadMod.getIsTargetDevice()) |
| return success(); |
| |
| omp::DeclareTargetDeviceType declareType = |
| attribute.getDeviceType().getValue(); |
| |
| if (declareType == omp::DeclareTargetDeviceType::host) { |
| llvm::Function *llvmFunc = |
| moduleTranslation.lookupFunction(funcOp.getName()); |
| llvmFunc->dropAllReferences(); |
| llvmFunc->eraseFromParent(); |
| } |
| } |
| return success(); |
| } |
| |
| if (LLVM::GlobalOp gOp = dyn_cast<LLVM::GlobalOp>(op)) { |
| llvm::Module *llvmModule = moduleTranslation.getLLVMModule(); |
| if (auto *gVal = llvmModule->getNamedValue(gOp.getSymName())) { |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| bool isDeclaration = gOp.isDeclaration(); |
| bool isExternallyVisible = |
| gOp.getVisibility() != mlir::SymbolTable::Visibility::Private; |
| auto loc = op->getLoc()->findInstanceOf<FileLineColLoc>(); |
| llvm::StringRef mangledName = gOp.getSymName(); |
| auto captureClause = |
| convertToCaptureClauseKind(attribute.getCaptureClause().getValue()); |
| auto deviceClause = |
| convertToDeviceClauseKind(attribute.getDeviceType().getValue()); |
| // unused for MLIR at the moment, required in Clang for book |
| // keeping |
| std::vector<llvm::GlobalVariable *> generatedRefs; |
| |
| std::vector<llvm::Triple> targetTriple; |
| auto targetTripleAttr = dyn_cast_or_null<mlir::StringAttr>( |
| op->getParentOfType<mlir::ModuleOp>()->getAttr( |
| LLVM::LLVMDialect::getTargetTripleAttrName())); |
| if (targetTripleAttr) |
| targetTriple.emplace_back(targetTripleAttr.data()); |
| |
| auto fileInfoCallBack = [&loc]() { |
| std::string filename = ""; |
| std::uint64_t lineNo = 0; |
| |
| if (loc) { |
| filename = loc.getFilename().str(); |
| lineNo = loc.getLine(); |
| } |
| |
| return std::pair<std::string, std::uint64_t>(llvm::StringRef(filename), |
| lineNo); |
| }; |
| |
| ompBuilder->registerTargetGlobalVariable( |
| captureClause, deviceClause, isDeclaration, isExternallyVisible, |
| ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName, |
| generatedRefs, /*OpenMPSimd*/ false, targetTriple, |
| /*GlobalInitializer*/ nullptr, /*VariableLinkage*/ nullptr, |
| gVal->getType(), gVal); |
| |
| if (ompBuilder->Config.isTargetDevice() && |
| (attribute.getCaptureClause().getValue() != |
| mlir::omp::DeclareTargetCaptureClause::to || |
| ompBuilder->Config.hasRequiresUnifiedSharedMemory())) { |
| ompBuilder->getAddrOfDeclareTargetVar( |
| captureClause, deviceClause, isDeclaration, isExternallyVisible, |
| ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName, |
| generatedRefs, /*OpenMPSimd*/ false, targetTriple, gVal->getType(), |
| /*GlobalInitializer*/ nullptr, |
| /*VariableLinkage*/ nullptr); |
| } |
| } |
| } |
| |
| return success(); |
| } |
| |
| // Returns true if the operation is inside a TargetOp or |
| // is part of a declare target function. |
| static bool isTargetDeviceOp(Operation *op) { |
| // Assumes no reverse offloading |
| if (op->getParentOfType<omp::TargetOp>()) |
| return true; |
| |
| if (auto parentFn = op->getParentOfType<LLVM::LLVMFuncOp>()) |
| if (auto declareTargetIface = |
| llvm::dyn_cast<mlir::omp::DeclareTargetInterface>( |
| parentFn.getOperation())) |
| if (declareTargetIface.isDeclareTarget() && |
| declareTargetIface.getDeclareTargetDeviceType() != |
| mlir::omp::DeclareTargetDeviceType::host) |
| return true; |
| |
| return false; |
| } |
| |
| /// Given an OpenMP MLIR operation, create the corresponding LLVM IR |
| /// (including OpenMP runtime calls). |
| static LogicalResult |
| convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| |
| return llvm::TypeSwitch<Operation *, LogicalResult>(op) |
| .Case([&](omp::BarrierOp) { |
| ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier); |
| return success(); |
| }) |
| .Case([&](omp::TaskwaitOp) { |
| ompBuilder->createTaskwait(builder.saveIP()); |
| return success(); |
| }) |
| .Case([&](omp::TaskyieldOp) { |
| ompBuilder->createTaskyield(builder.saveIP()); |
| return success(); |
| }) |
| .Case([&](omp::FlushOp) { |
| // No support in Openmp runtime function (__kmpc_flush) to accept |
| // the argument list. |
| // OpenMP standard states the following: |
| // "An implementation may implement a flush with a list by ignoring |
| // the list, and treating it the same as a flush without a list." |
| // |
| // The argument list is discarded so that, flush with a list is treated |
| // same as a flush without a list. |
| ompBuilder->createFlush(builder.saveIP()); |
| return success(); |
| }) |
| .Case([&](omp::ParallelOp op) { |
| return convertOmpParallel(op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::MasterOp) { |
| return convertOmpMaster(*op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::CriticalOp) { |
| return convertOmpCritical(*op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::OrderedRegionOp) { |
| return convertOmpOrderedRegion(*op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::OrderedOp) { |
| return convertOmpOrdered(*op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::WsloopOp) { |
| return convertOmpWsloop(*op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::SimdOp) { |
| return convertOmpSimd(*op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::AtomicReadOp) { |
| return convertOmpAtomicRead(*op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::AtomicWriteOp) { |
| return convertOmpAtomicWrite(*op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::AtomicUpdateOp op) { |
| return convertOmpAtomicUpdate(op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::AtomicCaptureOp op) { |
| return convertOmpAtomicCapture(op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::SectionsOp) { |
| return convertOmpSections(*op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::SingleOp op) { |
| return convertOmpSingle(op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::TeamsOp op) { |
| return convertOmpTeams(op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::TaskOp op) { |
| return convertOmpTaskOp(op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::TaskgroupOp op) { |
| return convertOmpTaskgroupOp(op, builder, moduleTranslation); |
| }) |
| .Case<omp::YieldOp, omp::TerminatorOp, omp::DeclareReductionOp, |
| omp::CriticalDeclareOp>([](auto op) { |
| // `yield` and `terminator` can be just omitted. The block structure |
| // was created in the region that handles their parent operation. |
| // `declare_reduction` will be used by reductions and is not |
| // converted directly, skip it. |
| // `critical.declare` is only used to declare names of critical |
| // sections which will be used by `critical` ops and hence can be |
| // ignored for lowering. The OpenMP IRBuilder will create unique |
| // name for critical section names. |
| return success(); |
| }) |
| .Case([&](omp::ThreadprivateOp) { |
| return convertOmpThreadprivate(*op, builder, moduleTranslation); |
| }) |
| .Case<omp::TargetDataOp, omp::TargetEnterDataOp, omp::TargetExitDataOp, |
| omp::TargetUpdateOp>([&](auto op) { |
| return convertOmpTargetData(op, builder, moduleTranslation); |
| }) |
| .Case([&](omp::TargetOp) { |
| return convertOmpTarget(*op, builder, moduleTranslation); |
| }) |
| .Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp>( |
| [&](auto op) { |
| // No-op, should be handled by relevant owning operations e.g. |
| // TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp etc. |
| // and then discarded |
| return success(); |
| }) |
| .Default([&](Operation *inst) { |
| return inst->emitError("unsupported OpenMP operation: ") |
| << inst->getName(); |
| }); |
| } |
| |
| static LogicalResult |
| convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| return convertHostOrTargetOperation(op, builder, moduleTranslation); |
| } |
| |
| static LogicalResult |
| convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) { |
| if (isa<omp::TargetOp>(op)) |
| return convertOmpTarget(*op, builder, moduleTranslation); |
| if (isa<omp::TargetDataOp>(op)) |
| return convertOmpTargetData(op, builder, moduleTranslation); |
| bool interrupted = |
| op->walk<WalkOrder::PreOrder>([&](Operation *oper) { |
| if (isa<omp::TargetOp>(oper)) { |
| if (failed(convertOmpTarget(*oper, builder, moduleTranslation))) |
| return WalkResult::interrupt(); |
| return WalkResult::skip(); |
| } |
| if (isa<omp::TargetDataOp>(oper)) { |
| if (failed(convertOmpTargetData(oper, builder, moduleTranslation))) |
| return WalkResult::interrupt(); |
| return WalkResult::skip(); |
| } |
| return WalkResult::advance(); |
| }).wasInterrupted(); |
| return failure(interrupted); |
| } |
| |
| namespace { |
| |
| /// Implementation of the dialect interface that converts operations belonging |
| /// to the OpenMP dialect to LLVM IR. |
| class OpenMPDialectLLVMIRTranslationInterface |
| : public LLVMTranslationDialectInterface { |
| public: |
| using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface; |
| |
| /// Translates the given operation to LLVM IR using the provided IR builder |
| /// and saving the state in `moduleTranslation`. |
| LogicalResult |
| convertOperation(Operation *op, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) const final; |
| |
| /// Given an OpenMP MLIR attribute, create the corresponding LLVM-IR, |
| /// runtime calls, or operation amendments |
| LogicalResult |
| amendOperation(Operation *op, ArrayRef<llvm::Instruction *> instructions, |
| NamedAttribute attribute, |
| LLVM::ModuleTranslation &moduleTranslation) const final; |
| }; |
| |
| } // namespace |
| |
| LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation( |
| Operation *op, ArrayRef<llvm::Instruction *> instructions, |
| NamedAttribute attribute, |
| LLVM::ModuleTranslation &moduleTranslation) const { |
| return llvm::StringSwitch<llvm::function_ref<LogicalResult(Attribute)>>( |
| attribute.getName()) |
| .Case("omp.is_target_device", |
| [&](Attribute attr) { |
| if (auto deviceAttr = dyn_cast<BoolAttr>(attr)) { |
| llvm::OpenMPIRBuilderConfig &config = |
| moduleTranslation.getOpenMPBuilder()->Config; |
| config.setIsTargetDevice(deviceAttr.getValue()); |
| return success(); |
| } |
| return failure(); |
| }) |
| .Case("omp.is_gpu", |
| [&](Attribute attr) { |
| if (auto gpuAttr = dyn_cast<BoolAttr>(attr)) { |
| llvm::OpenMPIRBuilderConfig &config = |
| moduleTranslation.getOpenMPBuilder()->Config; |
| config.setIsGPU(gpuAttr.getValue()); |
| return success(); |
| } |
| return failure(); |
| }) |
| .Case("omp.host_ir_filepath", |
| [&](Attribute attr) { |
| if (auto filepathAttr = dyn_cast<StringAttr>(attr)) { |
| llvm::OpenMPIRBuilder *ompBuilder = |
| moduleTranslation.getOpenMPBuilder(); |
| ompBuilder->loadOffloadInfoMetadata(filepathAttr.getValue()); |
| return success(); |
| } |
| return failure(); |
| }) |
| .Case("omp.flags", |
| [&](Attribute attr) { |
| if (auto rtlAttr = dyn_cast<omp::FlagsAttr>(attr)) |
| return convertFlagsAttr(op, rtlAttr, moduleTranslation); |
| return failure(); |
| }) |
| .Case("omp.version", |
| [&](Attribute attr) { |
| if (auto versionAttr = dyn_cast<omp::VersionAttr>(attr)) { |
| llvm::OpenMPIRBuilder *ompBuilder = |
| moduleTranslation.getOpenMPBuilder(); |
| ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp", |
| versionAttr.getVersion()); |
| return success(); |
| } |
| return failure(); |
| }) |
| .Case("omp.declare_target", |
| [&](Attribute attr) { |
| if (auto declareTargetAttr = |
| dyn_cast<omp::DeclareTargetAttr>(attr)) |
| return convertDeclareTargetAttr(op, declareTargetAttr, |
| moduleTranslation); |
| return failure(); |
| }) |
| .Case("omp.requires", |
| [&](Attribute attr) { |
| if (auto requiresAttr = dyn_cast<omp::ClauseRequiresAttr>(attr)) { |
| using Requires = omp::ClauseRequires; |
| Requires flags = requiresAttr.getValue(); |
| llvm::OpenMPIRBuilderConfig &config = |
| moduleTranslation.getOpenMPBuilder()->Config; |
| config.setHasRequiresReverseOffload( |
| bitEnumContainsAll(flags, Requires::reverse_offload)); |
| config.setHasRequiresUnifiedAddress( |
| bitEnumContainsAll(flags, Requires::unified_address)); |
| config.setHasRequiresUnifiedSharedMemory( |
| bitEnumContainsAll(flags, Requires::unified_shared_memory)); |
| config.setHasRequiresDynamicAllocators( |
| bitEnumContainsAll(flags, Requires::dynamic_allocators)); |
| return success(); |
| } |
| return failure(); |
| }) |
| .Default([](Attribute) { |
| // Fall through for omp attributes that do not require lowering. |
| return success(); |
| })(attribute.getValue()); |
| |
| return failure(); |
| } |
| |
| /// Given an OpenMP MLIR operation, create the corresponding LLVM IR |
| /// (including OpenMP runtime calls). |
| LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation( |
| Operation *op, llvm::IRBuilderBase &builder, |
| LLVM::ModuleTranslation &moduleTranslation) const { |
| |
| llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); |
| if (ompBuilder->Config.isTargetDevice()) { |
| if (isTargetDeviceOp(op)) { |
| return convertTargetDeviceOp(op, builder, moduleTranslation); |
| } else { |
| return convertTargetOpsInNest(op, builder, moduleTranslation); |
| } |
| } |
| return convertHostOrTargetOperation(op, builder, moduleTranslation); |
| } |
| |
| void mlir::registerOpenMPDialectTranslation(DialectRegistry ®istry) { |
| registry.insert<omp::OpenMPDialect>(); |
| registry.addExtension(+[](MLIRContext *ctx, omp::OpenMPDialect *dialect) { |
| dialect->addInterfaces<OpenMPDialectLLVMIRTranslationInterface>(); |
| }); |
| } |
| |
| void mlir::registerOpenMPDialectTranslation(MLIRContext &context) { |
| DialectRegistry registry; |
| registerOpenMPDialectTranslation(registry); |
| context.appendDialectRegistry(registry); |
| } |