blob: afa823c5b600b08841993bb4fcf78b26b632d031 [file] [log] [blame]
//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This provides a class for OpenMP runtime code generation.
//
//===----------------------------------------------------------------------===//
#include "CGCXXABI.h"
#include "CGCleanup.h"
#include "CGOpenMPRuntime.h"
#include "CodeGenFunction.h"
#include "clang/AST/Decl.h"
#include "clang/AST/StmtOpenMP.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/Bitcode/ReaderWriter.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
using namespace clang;
using namespace CodeGen;
namespace {
/// \brief Base class for handling code generation inside OpenMP regions.
class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
public:
/// \brief Kinds of OpenMP regions used in codegen.
enum CGOpenMPRegionKind {
/// \brief Region with outlined function for standalone 'parallel'
/// directive.
ParallelOutlinedRegion,
/// \brief Region with outlined function for standalone 'task' directive.
TaskOutlinedRegion,
/// \brief Region for constructs that do not require function outlining,
/// like 'for', 'sections', 'atomic' etc. directives.
InlinedRegion,
/// \brief Region with outlined function for standalone 'target' directive.
TargetRegion,
};
CGOpenMPRegionInfo(const CapturedStmt &CS,
const CGOpenMPRegionKind RegionKind,
const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
bool HasCancel)
: CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
bool HasCancel)
: CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
Kind(Kind), HasCancel(HasCancel) {}
/// \brief Get a variable or parameter for storing global thread id
/// inside OpenMP construct.
virtual const VarDecl *getThreadIDVariable() const = 0;
/// \brief Emit the captured statement body.
void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
/// \brief Get an LValue for the current ThreadID variable.
/// \return LValue for thread id variable. This LValue always has type int32*.
virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
bool hasCancel() const { return HasCancel; }
static bool classof(const CGCapturedStmtInfo *Info) {
return Info->getKind() == CR_OpenMP;
}
~CGOpenMPRegionInfo() override = default;
protected:
CGOpenMPRegionKind RegionKind;
RegionCodeGenTy CodeGen;
OpenMPDirectiveKind Kind;
bool HasCancel;
};
/// \brief API for captured statement code generation in OpenMP constructs.
class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
public:
CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
const RegionCodeGenTy &CodeGen,
OpenMPDirectiveKind Kind, bool HasCancel)
: CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
HasCancel),
ThreadIDVar(ThreadIDVar) {
assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
}
/// \brief Get a variable or parameter for storing global thread id
/// inside OpenMP construct.
const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
/// \brief Get the name of the capture helper.
StringRef getHelperName() const override { return ".omp_outlined."; }
static bool classof(const CGCapturedStmtInfo *Info) {
return CGOpenMPRegionInfo::classof(Info) &&
cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
ParallelOutlinedRegion;
}
private:
/// \brief A variable or parameter storing global thread id for OpenMP
/// constructs.
const VarDecl *ThreadIDVar;
};
/// \brief API for captured statement code generation in OpenMP constructs.
class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
public:
class UntiedTaskActionTy final : public PrePostActionTy {
bool Untied;
const VarDecl *PartIDVar;
const RegionCodeGenTy UntiedCodeGen;
llvm::SwitchInst *UntiedSwitch = nullptr;
public:
UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
const RegionCodeGenTy &UntiedCodeGen)
: Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
void Enter(CodeGenFunction &CGF) override {
if (Untied) {
// Emit task switching point.
auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
CGF.GetAddrOfLocalVar(PartIDVar),
PartIDVar->getType()->castAs<PointerType>());
auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation());
auto *DoneBB = CGF.createBasicBlock(".untied.done.");
UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
CGF.EmitBlock(DoneBB);
CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
UntiedSwitch->addCase(CGF.Builder.getInt32(0),
CGF.Builder.GetInsertBlock());
emitUntiedSwitch(CGF);
}
}
void emitUntiedSwitch(CodeGenFunction &CGF) const {
if (Untied) {
auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
CGF.GetAddrOfLocalVar(PartIDVar),
PartIDVar->getType()->castAs<PointerType>());
CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
PartIdLVal);
UntiedCodeGen(CGF);
CodeGenFunction::JumpDest CurPoint =
CGF.getJumpDestInCurrentScope(".untied.next.");
CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
CGF.Builder.GetInsertBlock());
CGF.EmitBranchThroughCleanup(CurPoint);
CGF.EmitBlock(CurPoint.getBlock());
}
}
unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
};
CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
const VarDecl *ThreadIDVar,
const RegionCodeGenTy &CodeGen,
OpenMPDirectiveKind Kind, bool HasCancel,
const UntiedTaskActionTy &Action)
: CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
ThreadIDVar(ThreadIDVar), Action(Action) {
assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
}
/// \brief Get a variable or parameter for storing global thread id
/// inside OpenMP construct.
const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
/// \brief Get an LValue for the current ThreadID variable.
LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
/// \brief Get the name of the capture helper.
StringRef getHelperName() const override { return ".omp_outlined."; }
void emitUntiedSwitch(CodeGenFunction &CGF) override {
Action.emitUntiedSwitch(CGF);
}
static bool classof(const CGCapturedStmtInfo *Info) {
return CGOpenMPRegionInfo::classof(Info) &&
cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
TaskOutlinedRegion;
}
private:
/// \brief A variable or parameter storing global thread id for OpenMP
/// constructs.
const VarDecl *ThreadIDVar;
/// Action for emitting code for untied tasks.
const UntiedTaskActionTy &Action;
};
/// \brief API for inlined captured statement code generation in OpenMP
/// constructs.
class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
public:
CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
const RegionCodeGenTy &CodeGen,
OpenMPDirectiveKind Kind, bool HasCancel)
: CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
OldCSI(OldCSI),
OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
// \brief Retrieve the value of the context parameter.
llvm::Value *getContextValue() const override {
if (OuterRegionInfo)
return OuterRegionInfo->getContextValue();
llvm_unreachable("No context value for inlined OpenMP region");
}
void setContextValue(llvm::Value *V) override {
if (OuterRegionInfo) {
OuterRegionInfo->setContextValue(V);
return;
}
llvm_unreachable("No context value for inlined OpenMP region");
}
/// \brief Lookup the captured field decl for a variable.
const FieldDecl *lookup(const VarDecl *VD) const override {
if (OuterRegionInfo)
return OuterRegionInfo->lookup(VD);
// If there is no outer outlined region,no need to lookup in a list of
// captured variables, we can use the original one.
return nullptr;
}
FieldDecl *getThisFieldDecl() const override {
if (OuterRegionInfo)
return OuterRegionInfo->getThisFieldDecl();
return nullptr;
}
/// \brief Get a variable or parameter for storing global thread id
/// inside OpenMP construct.
const VarDecl *getThreadIDVariable() const override {
if (OuterRegionInfo)
return OuterRegionInfo->getThreadIDVariable();
return nullptr;
}
/// \brief Get the name of the capture helper.
StringRef getHelperName() const override {
if (auto *OuterRegionInfo = getOldCSI())
return OuterRegionInfo->getHelperName();
llvm_unreachable("No helper name for inlined OpenMP construct");
}
void emitUntiedSwitch(CodeGenFunction &CGF) override {
if (OuterRegionInfo)
OuterRegionInfo->emitUntiedSwitch(CGF);
}
CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
static bool classof(const CGCapturedStmtInfo *Info) {
return CGOpenMPRegionInfo::classof(Info) &&
cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
}
~CGOpenMPInlinedRegionInfo() override = default;
private:
/// \brief CodeGen info about outer OpenMP region.
CodeGenFunction::CGCapturedStmtInfo *OldCSI;
CGOpenMPRegionInfo *OuterRegionInfo;
};
/// \brief API for captured statement code generation in OpenMP target
/// constructs. For this captures, implicit parameters are used instead of the
/// captured fields. The name of the target region has to be unique in a given
/// application so it is provided by the client, because only the client has
/// the information to generate that.
class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
public:
CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
const RegionCodeGenTy &CodeGen, StringRef HelperName)
: CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
/*HasCancel=*/false),
HelperName(HelperName) {}
/// \brief This is unused for target regions because each starts executing
/// with a single thread.
const VarDecl *getThreadIDVariable() const override { return nullptr; }
/// \brief Get the name of the capture helper.
StringRef getHelperName() const override { return HelperName; }
static bool classof(const CGCapturedStmtInfo *Info) {
return CGOpenMPRegionInfo::classof(Info) &&
cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
}
private:
StringRef HelperName;
};
static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
llvm_unreachable("No codegen for expressions");
}
/// \brief API for generation of expressions captured in a innermost OpenMP
/// region.
class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
public:
CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
: CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
OMPD_unknown,
/*HasCancel=*/false),
PrivScope(CGF) {
// Make sure the globals captured in the provided statement are local by
// using the privatization logic. We assume the same variable is not
// captured more than once.
for (auto &C : CS.captures()) {
if (!C.capturesVariable() && !C.capturesVariableByCopy())
continue;
const VarDecl *VD = C.getCapturedVar();
if (VD->isLocalVarDeclOrParm())
continue;
DeclRefExpr DRE(const_cast<VarDecl *>(VD),
/*RefersToEnclosingVariableOrCapture=*/false,
VD->getType().getNonReferenceType(), VK_LValue,
SourceLocation());
PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address {
return CGF.EmitLValue(&DRE).getAddress();
});
}
(void)PrivScope.Privatize();
}
/// \brief Lookup the captured field decl for a variable.
const FieldDecl *lookup(const VarDecl *VD) const override {
if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
return FD;
return nullptr;
}
/// \brief Emit the captured statement body.
void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
llvm_unreachable("No body for expressions");
}
/// \brief Get a variable or parameter for storing global thread id
/// inside OpenMP construct.
const VarDecl *getThreadIDVariable() const override {
llvm_unreachable("No thread id for expressions");
}
/// \brief Get the name of the capture helper.
StringRef getHelperName() const override {
llvm_unreachable("No helper name for expressions");
}
static bool classof(const CGCapturedStmtInfo *Info) { return false; }
private:
/// Private scope to capture global variables.
CodeGenFunction::OMPPrivateScope PrivScope;
};
/// \brief RAII for emitting code of OpenMP constructs.
class InlinedOpenMPRegionRAII {
CodeGenFunction &CGF;
llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
FieldDecl *LambdaThisCaptureField = nullptr;
public:
/// \brief Constructs region for combined constructs.
/// \param CodeGen Code generation sequence for combined directives. Includes
/// a list of functions used for code generation of implicitly inlined
/// regions.
InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
OpenMPDirectiveKind Kind, bool HasCancel)
: CGF(CGF) {
// Start emission for the construct.
CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
LambdaThisCaptureField = CGF.LambdaThisCaptureField;
CGF.LambdaThisCaptureField = nullptr;
}
~InlinedOpenMPRegionRAII() {
// Restore original CapturedStmtInfo only if we're done with code emission.
auto *OldCSI =
cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
delete CGF.CapturedStmtInfo;
CGF.CapturedStmtInfo = OldCSI;
std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
CGF.LambdaThisCaptureField = LambdaThisCaptureField;
}
};
/// \brief Values for bit flags used in the ident_t to describe the fields.
/// All enumeric elements are named and described in accordance with the code
/// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
enum OpenMPLocationFlags {
/// \brief Use trampoline for internal microtask.
OMP_IDENT_IMD = 0x01,
/// \brief Use c-style ident structure.
OMP_IDENT_KMPC = 0x02,
/// \brief Atomic reduction option for kmpc_reduce.
OMP_ATOMIC_REDUCE = 0x10,
/// \brief Explicit 'barrier' directive.
OMP_IDENT_BARRIER_EXPL = 0x20,
/// \brief Implicit barrier in code.
OMP_IDENT_BARRIER_IMPL = 0x40,
/// \brief Implicit barrier in 'for' directive.
OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
/// \brief Implicit barrier in 'sections' directive.
OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
/// \brief Implicit barrier in 'single' directive.
OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140
};
/// \brief Describes ident structure that describes a source location.
/// All descriptions are taken from
/// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
/// Original structure:
/// typedef struct ident {
/// kmp_int32 reserved_1; /**< might be used in Fortran;
/// see above */
/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
/// KMP_IDENT_KMPC identifies this union
/// member */
/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
/// see above */
///#if USE_ITT_BUILD
/// /* but currently used for storing
/// region-specific ITT */
/// /* contextual information. */
///#endif /* USE_ITT_BUILD */
/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
/// C++ */
/// char const *psource; /**< String describing the source location.
/// The string is composed of semi-colon separated
// fields which describe the source file,
/// the function and a pair of line numbers that
/// delimit the construct.
/// */
/// } ident_t;
enum IdentFieldIndex {
/// \brief might be used in Fortran
IdentField_Reserved_1,
/// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
IdentField_Flags,
/// \brief Not really used in Fortran any more
IdentField_Reserved_2,
/// \brief Source[4] in Fortran, do not use for C++
IdentField_Reserved_3,
/// \brief String describing the source location. The string is composed of
/// semi-colon separated fields which describe the source file, the function
/// and a pair of line numbers that delimit the construct.
IdentField_PSource
};
/// \brief Schedule types for 'omp for' loops (these enumerators are taken from
/// the enum sched_type in kmp.h).
enum OpenMPSchedType {
/// \brief Lower bound for default (unordered) versions.
OMP_sch_lower = 32,
OMP_sch_static_chunked = 33,
OMP_sch_static = 34,
OMP_sch_dynamic_chunked = 35,
OMP_sch_guided_chunked = 36,
OMP_sch_runtime = 37,
OMP_sch_auto = 38,
/// static with chunk adjustment (e.g., simd)
OMP_sch_static_balanced_chunked = 45,
/// \brief Lower bound for 'ordered' versions.
OMP_ord_lower = 64,
OMP_ord_static_chunked = 65,
OMP_ord_static = 66,
OMP_ord_dynamic_chunked = 67,
OMP_ord_guided_chunked = 68,
OMP_ord_runtime = 69,
OMP_ord_auto = 70,
OMP_sch_default = OMP_sch_static,
/// \brief dist_schedule types
OMP_dist_sch_static_chunked = 91,
OMP_dist_sch_static = 92,
/// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
/// Set if the monotonic schedule modifier was present.
OMP_sch_modifier_monotonic = (1 << 29),
/// Set if the nonmonotonic schedule modifier was present.
OMP_sch_modifier_nonmonotonic = (1 << 30),
};
enum OpenMPRTLFunction {
/// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
/// kmpc_micro microtask, ...);
OMPRTL__kmpc_fork_call,
/// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc,
/// kmp_int32 global_tid, void *data, size_t size, void ***cache);
OMPRTL__kmpc_threadprivate_cached,
/// \brief Call to void __kmpc_threadprivate_register( ident_t *,
/// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
OMPRTL__kmpc_threadprivate_register,
// Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
OMPRTL__kmpc_global_thread_num,
// Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
// kmp_critical_name *crit);
OMPRTL__kmpc_critical,
// Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
// global_tid, kmp_critical_name *crit, uintptr_t hint);
OMPRTL__kmpc_critical_with_hint,
// Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
// kmp_critical_name *crit);
OMPRTL__kmpc_end_critical,
// Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
// global_tid);
OMPRTL__kmpc_cancel_barrier,
// Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
OMPRTL__kmpc_barrier,
// Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
OMPRTL__kmpc_for_static_fini,
// Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
// global_tid);
OMPRTL__kmpc_serialized_parallel,
// Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
// global_tid);
OMPRTL__kmpc_end_serialized_parallel,
// Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
// kmp_int32 num_threads);
OMPRTL__kmpc_push_num_threads,
// Call to void __kmpc_flush(ident_t *loc);
OMPRTL__kmpc_flush,
// Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
OMPRTL__kmpc_master,
// Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
OMPRTL__kmpc_end_master,
// Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
// int end_part);
OMPRTL__kmpc_omp_taskyield,
// Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
OMPRTL__kmpc_single,
// Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
OMPRTL__kmpc_end_single,
// Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
// kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
// kmp_routine_entry_t *task_entry);
OMPRTL__kmpc_omp_task_alloc,
// Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
// new_task);
OMPRTL__kmpc_omp_task,
// Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
// size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
// kmp_int32 didit);
OMPRTL__kmpc_copyprivate,
// Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
// kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
// (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
OMPRTL__kmpc_reduce,
// Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
// global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
// void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
// *lck);
OMPRTL__kmpc_reduce_nowait,
// Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
// kmp_critical_name *lck);
OMPRTL__kmpc_end_reduce,
// Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
// kmp_critical_name *lck);
OMPRTL__kmpc_end_reduce_nowait,
// Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
// kmp_task_t * new_task);
OMPRTL__kmpc_omp_task_begin_if0,
// Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
// kmp_task_t * new_task);
OMPRTL__kmpc_omp_task_complete_if0,
// Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
OMPRTL__kmpc_ordered,
// Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
OMPRTL__kmpc_end_ordered,
// Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
// global_tid);
OMPRTL__kmpc_omp_taskwait,
// Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
OMPRTL__kmpc_taskgroup,
// Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
OMPRTL__kmpc_end_taskgroup,
// Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
// int proc_bind);
OMPRTL__kmpc_push_proc_bind,
// Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
// gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
// *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
OMPRTL__kmpc_omp_task_with_deps,
// Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
// gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
// ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
OMPRTL__kmpc_omp_wait_deps,
// Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
// global_tid, kmp_int32 cncl_kind);
OMPRTL__kmpc_cancellationpoint,
// Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
// kmp_int32 cncl_kind);
OMPRTL__kmpc_cancel,
// Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
// kmp_int32 num_teams, kmp_int32 thread_limit);
OMPRTL__kmpc_push_num_teams,
// Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
// microtask, ...);
OMPRTL__kmpc_fork_teams,
// Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
// if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
// sched, kmp_uint64 grainsize, void *task_dup);
OMPRTL__kmpc_taskloop,
// Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
// num_dims, struct kmp_dim *dims);
OMPRTL__kmpc_doacross_init,
// Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
OMPRTL__kmpc_doacross_fini,
// Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
// *vec);
OMPRTL__kmpc_doacross_post,
// Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
// *vec);
OMPRTL__kmpc_doacross_wait,
//
// Offloading related calls
//
// Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
// arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
// *arg_types);
OMPRTL__tgt_target,
// Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
// int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
// int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
OMPRTL__tgt_target_teams,
// Call to void __tgt_register_lib(__tgt_bin_desc *desc);
OMPRTL__tgt_register_lib,
// Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
OMPRTL__tgt_unregister_lib,
// Call to void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
// void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
OMPRTL__tgt_target_data_begin,
// Call to void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
// void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
OMPRTL__tgt_target_data_end,
// Call to void __tgt_target_data_update(int32_t device_id, int32_t arg_num,
// void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
OMPRTL__tgt_target_data_update,
};
/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
/// region.
class CleanupTy final : public EHScopeStack::Cleanup {
PrePostActionTy *Action;
public:
explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
if (!CGF.HaveInsertPoint())
return;
Action->Exit(CGF);
}
};
} // anonymous namespace
void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
CodeGenFunction::RunCleanupsScope Scope(CGF);
if (PrePostAction) {
CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
Callback(CodeGen, CGF, *PrePostAction);
} else {
PrePostActionTy Action;
Callback(CodeGen, CGF, Action);
}
}
LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
return CGF.EmitLoadOfPointerLValue(
CGF.GetAddrOfLocalVar(getThreadIDVariable()),
getThreadIDVariable()->getType()->castAs<PointerType>());
}
void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
if (!CGF.HaveInsertPoint())
return;
// 1.2.2 OpenMP Language Terminology
// Structured block - An executable statement with a single entry at the
// top and a single exit at the bottom.
// The point of exit cannot be a branch out of the structured block.
// longjmp() and throw() must not violate the entry/exit criteria.
CGF.EHStack.pushTerminate();
CodeGen(CGF);
CGF.EHStack.popTerminate();
}
LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
CodeGenFunction &CGF) {
return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
getThreadIDVariable()->getType(),
AlignmentSource::Decl);
}
CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
: CGM(CGM), OffloadEntriesInfoManager(CGM) {
IdentTy = llvm::StructType::create(
"ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
CGM.Int8PtrTy /* psource */, nullptr);
KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
loadOffloadInfoMetadata();
}
void CGOpenMPRuntime::clear() {
InternalVars.clear();
}
static llvm::Function *
emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
const Expr *CombinerInitializer, const VarDecl *In,
const VarDecl *Out, bool IsCombiner) {
// void .omp_combiner.(Ty *in, Ty *out);
auto &C = CGM.getContext();
QualType PtrTy = C.getPointerType(Ty).withRestrict();
FunctionArgList Args;
ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
/*Id=*/nullptr, PtrTy);
ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
/*Id=*/nullptr, PtrTy);
Args.push_back(&OmpOutParm);
Args.push_back(&OmpInParm);
auto &FnInfo =
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
auto *Fn = llvm::Function::Create(
FnTy, llvm::GlobalValue::InternalLinkage,
IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule());
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
Fn->addFnAttr(llvm::Attribute::AlwaysInline);
CodeGenFunction CGF(CGM);
// Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
// Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
CodeGenFunction::OMPPrivateScope Scope(CGF);
Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address {
return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
.getAddress();
});
Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address {
return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
.getAddress();
});
(void)Scope.Privatize();
CGF.EmitIgnoredExpr(CombinerInitializer);
Scope.ForceCleanup();
CGF.FinishFunction();
return Fn;
}
void CGOpenMPRuntime::emitUserDefinedReduction(
CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
if (UDRMap.count(D) > 0)
return;
auto &C = CGM.getContext();
if (!In || !Out) {
In = &C.Idents.get("omp_in");
Out = &C.Idents.get("omp_out");
}
llvm::Function *Combiner = emitCombinerOrInitializer(
CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()),
cast<VarDecl>(D->lookup(Out).front()),
/*IsCombiner=*/true);
llvm::Function *Initializer = nullptr;
if (auto *Init = D->getInitializer()) {
if (!Priv || !Orig) {
Priv = &C.Idents.get("omp_priv");
Orig = &C.Idents.get("omp_orig");
}
Initializer = emitCombinerOrInitializer(
CGM, D->getType(), Init, cast<VarDecl>(D->lookup(Orig).front()),
cast<VarDecl>(D->lookup(Priv).front()),
/*IsCombiner=*/false);
}
UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer)));
if (CGF) {
auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
Decls.second.push_back(D);
}
}
std::pair<llvm::Function *, llvm::Function *>
CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
auto I = UDRMap.find(D);
if (I != UDRMap.end())
return I->second;
emitUserDefinedReduction(/*CGF=*/nullptr, D);
return UDRMap.lookup(D);
}
// Layout information for ident_t.
static CharUnits getIdentAlign(CodeGenModule &CGM) {
return CGM.getPointerAlign();
}
static CharUnits getIdentSize(CodeGenModule &CGM) {
assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
return CharUnits::fromQuantity(16) + CGM.getPointerSize();
}
static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) {
// All the fields except the last are i32, so this works beautifully.
return unsigned(Field) * CharUnits::fromQuantity(4);
}
static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
IdentFieldIndex Field,
const llvm::Twine &Name = "") {
auto Offset = getOffsetOfIdentField(Field);
return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
}
llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
assert(ThreadIDVar->getType()->isPointerType() &&
"thread id variable must be of type kmp_int32 *");
const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
CodeGenFunction CGF(CGM, true);
bool HasCancel = false;
if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
HasCancel = OPD->hasCancel();
else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
HasCancel = OPSD->hasCancel();
else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
HasCancel = OPFD->hasCancel();
CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
HasCancel);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
}
llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
const VarDecl *PartIDVar, const VarDecl *TaskTVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
bool Tied, unsigned &NumberOfParts) {
auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
PrePostActionTy &) {
auto *ThreadID = getThreadID(CGF, D.getLocStart());
auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart());
llvm::Value *TaskArgs[] = {
UpLoc, ThreadID,
CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
TaskTVar->getType()->castAs<PointerType>())
.getPointer()};
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
};
CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
UntiedCodeGen);
CodeGen.setAction(Action);
assert(!ThreadIDVar->getType()->isPointerType() &&
"thread id variable must be of type kmp_int32 for tasks");
auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
auto *TD = dyn_cast<OMPTaskDirective>(&D);
CodeGenFunction CGF(CGM, true);
CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
InnermostKind,
TD ? TD->hasCancel() : false, Action);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
auto *Res = CGF.GenerateCapturedStmtFunction(*CS);
if (!Tied)
NumberOfParts = Action.getNumberOfParts();
return Res;
}
Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
CharUnits Align = getIdentAlign(CGM);
llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
if (!Entry) {
if (!DefaultOpenMPPSource) {
// Initialize default location for psource field of ident_t structure of
// all ident_t objects. Format is ";file;function;line;column;;".
// Taken from
// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
DefaultOpenMPPSource =
CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
DefaultOpenMPPSource =
llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
}
auto DefaultOpenMPLocation = new llvm::GlobalVariable(
CGM.getModule(), IdentTy, /*isConstant*/ true,
llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
DefaultOpenMPLocation->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
DefaultOpenMPLocation->setAlignment(Align.getQuantity());
llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
llvm::Constant *Values[] = {Zero,
llvm::ConstantInt::get(CGM.Int32Ty, Flags),
Zero, Zero, DefaultOpenMPPSource};
llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
DefaultOpenMPLocation->setInitializer(Init);
OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
}
return Address(Entry, Align);
}
llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
SourceLocation Loc,
unsigned Flags) {
Flags |= OMP_IDENT_KMPC;
// If no debug info is generated - return global default location.
if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
Loc.isInvalid())
return getOrCreateDefaultLocation(Flags).getPointer();
assert(CGF.CurFn && "No function in current CodeGenFunction.");
Address LocValue = Address::invalid();
auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
if (I != OpenMPLocThreadIDMap.end())
LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM));
// OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
// GetOpenMPThreadID was called before this routine.
if (!LocValue.isValid()) {
// Generate "ident_t .kmpc_loc.addr;"
Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM),
".kmpc_loc.addr");
auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
Elem.second.DebugLoc = AI.getPointer();
LocValue = AI;
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
CGM.getSize(getIdentSize(CGF.CGM)));
}
// char **psource = &.kmpc_loc_<flags>.addr.psource;
Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource);
auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
if (OMPDebugLoc == nullptr) {
SmallString<128> Buffer2;
llvm::raw_svector_ostream OS2(Buffer2);
// Build debug location
PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
OS2 << ";" << PLoc.getFilename() << ";";
if (const FunctionDecl *FD =
dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
OS2 << FD->getQualifiedNameAsString();
}
OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
}
// *psource = ";<File>;<Function>;<Line>;<Column>;;";
CGF.Builder.CreateStore(OMPDebugLoc, PSource);
// Our callers always pass this to a runtime function, so for
// convenience, go ahead and return a naked pointer.
return LocValue.getPointer();
}
llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
SourceLocation Loc) {
assert(CGF.CurFn && "No function in current CodeGenFunction.");
llvm::Value *ThreadID = nullptr;
// Check whether we've already cached a load of the thread id in this
// function.
auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
if (I != OpenMPLocThreadIDMap.end()) {
ThreadID = I->second.ThreadID;
if (ThreadID != nullptr)
return ThreadID;
}
if (auto *OMPRegionInfo =
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
if (OMPRegionInfo->getThreadIDVariable()) {
// Check if this an outlined function with thread id passed as argument.
auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
// If value loaded in entry block, cache it and use it everywhere in
// function.
if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
Elem.second.ThreadID = ThreadID;
}
return ThreadID;
}
}
// This is not an outlined function region - need to call __kmpc_int32
// kmpc_global_thread_num(ident_t *loc).
// Generate thread id value and cache this value for use across the
// function.
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
ThreadID =
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
emitUpdateLocation(CGF, Loc));
auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
Elem.second.ThreadID = ThreadID;
return ThreadID;
}
void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
assert(CGF.CurFn && "No function in current CodeGenFunction.");
if (OpenMPLocThreadIDMap.count(CGF.CurFn))
OpenMPLocThreadIDMap.erase(CGF.CurFn);
if (FunctionUDRMap.count(CGF.CurFn) > 0) {
for(auto *D : FunctionUDRMap[CGF.CurFn]) {
UDRMap.erase(D);
}
FunctionUDRMap.erase(CGF.CurFn);
}
}
llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
if (!IdentTy) {
}
return llvm::PointerType::getUnqual(IdentTy);
}
llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
if (!Kmpc_MicroTy) {
// Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
llvm::PointerType::getUnqual(CGM.Int32Ty)};
Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
}
return llvm::PointerType::getUnqual(Kmpc_MicroTy);
}
llvm::Constant *
CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
llvm::Constant *RTLFn = nullptr;
switch (static_cast<OpenMPRTLFunction>(Function)) {
case OMPRTL__kmpc_fork_call: {
// Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
// microtask, ...);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
getKmpc_MicroPointerTy()};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
break;
}
case OMPRTL__kmpc_global_thread_num: {
// Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
break;
}
case OMPRTL__kmpc_threadprivate_cached: {
// Build void *__kmpc_threadprivate_cached(ident_t *loc,
// kmp_int32 global_tid, void *data, size_t size, void ***cache);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
CGM.VoidPtrTy, CGM.SizeTy,
CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
break;
}
case OMPRTL__kmpc_critical: {
// Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
// kmp_critical_name *crit);
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), CGM.Int32Ty,
llvm::PointerType::getUnqual(KmpCriticalNameTy)};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
break;
}
case OMPRTL__kmpc_critical_with_hint: {
// Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
// kmp_critical_name *crit, uintptr_t hint);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
llvm::PointerType::getUnqual(KmpCriticalNameTy),
CGM.IntPtrTy};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
break;
}
case OMPRTL__kmpc_threadprivate_register: {
// Build void __kmpc_threadprivate_register(ident_t *, void *data,
// kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
// typedef void *(*kmpc_ctor)(void *);
auto KmpcCtorTy =
llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
/*isVarArg*/ false)->getPointerTo();
// typedef void *(*kmpc_cctor)(void *, void *);
llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
auto KmpcCopyCtorTy =
llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
/*isVarArg*/ false)->getPointerTo();
// typedef void (*kmpc_dtor)(void *);
auto KmpcDtorTy =
llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
->getPointerTo();
llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
KmpcCopyCtorTy, KmpcDtorTy};
auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
/*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
break;
}
case OMPRTL__kmpc_end_critical: {
// Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
// kmp_critical_name *crit);
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), CGM.Int32Ty,
llvm::PointerType::getUnqual(KmpCriticalNameTy)};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
break;
}
case OMPRTL__kmpc_cancel_barrier: {
// Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
// global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
break;
}
case OMPRTL__kmpc_barrier: {
// Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
break;
}
case OMPRTL__kmpc_for_static_fini: {
// Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
break;
}
case OMPRTL__kmpc_push_num_threads: {
// Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
// kmp_int32 num_threads)
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
break;
}
case OMPRTL__kmpc_serialized_parallel: {
// Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
// global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
break;
}
case OMPRTL__kmpc_end_serialized_parallel: {
// Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
// global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
break;
}
case OMPRTL__kmpc_flush: {
// Build void __kmpc_flush(ident_t *loc);
llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
break;
}
case OMPRTL__kmpc_master: {
// Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
break;
}
case OMPRTL__kmpc_end_master: {
// Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
break;
}
case OMPRTL__kmpc_omp_taskyield: {
// Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
// int end_part);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
break;
}
case OMPRTL__kmpc_single: {
// Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
break;
}
case OMPRTL__kmpc_end_single: {
// Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
break;
}
case OMPRTL__kmpc_omp_task_alloc: {
// Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
// kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
// kmp_routine_entry_t *task_entry);
assert(KmpRoutineEntryPtrTy != nullptr &&
"Type kmp_routine_entry_t must be created.");
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
// Return void * and then cast to particular kmp_task_t type.
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
break;
}
case OMPRTL__kmpc_omp_task: {
// Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
// *new_task);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
CGM.VoidPtrTy};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
break;
}
case OMPRTL__kmpc_copyprivate: {
// Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
// size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
// kmp_int32 didit);
llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
auto *CpyFnTy =
llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
break;
}
case OMPRTL__kmpc_reduce: {
// Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
// kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
// (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
/*isVarArg=*/false);
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
llvm::PointerType::getUnqual(KmpCriticalNameTy)};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
break;
}
case OMPRTL__kmpc_reduce_nowait: {
// Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
// global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
// void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
// *lck);
llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
/*isVarArg=*/false);
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
llvm::PointerType::getUnqual(KmpCriticalNameTy)};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
break;
}
case OMPRTL__kmpc_end_reduce: {
// Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
// kmp_critical_name *lck);
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), CGM.Int32Ty,
llvm::PointerType::getUnqual(KmpCriticalNameTy)};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
break;
}
case OMPRTL__kmpc_end_reduce_nowait: {
// Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
// kmp_critical_name *lck);
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), CGM.Int32Ty,
llvm::PointerType::getUnqual(KmpCriticalNameTy)};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn =
CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
break;
}
case OMPRTL__kmpc_omp_task_begin_if0: {
// Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
// *new_task);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
CGM.VoidPtrTy};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn =
CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
break;
}
case OMPRTL__kmpc_omp_task_complete_if0: {
// Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
// *new_task);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
CGM.VoidPtrTy};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy,
/*Name=*/"__kmpc_omp_task_complete_if0");
break;
}
case OMPRTL__kmpc_ordered: {
// Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
break;
}
case OMPRTL__kmpc_end_ordered: {
// Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
break;
}
case OMPRTL__kmpc_omp_taskwait: {
// Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
break;
}
case OMPRTL__kmpc_taskgroup: {
// Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
break;
}
case OMPRTL__kmpc_end_taskgroup: {
// Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
break;
}
case OMPRTL__kmpc_push_proc_bind: {
// Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
// int proc_bind)
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
break;
}
case OMPRTL__kmpc_omp_task_with_deps: {
// Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
// kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
// kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn =
CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
break;
}
case OMPRTL__kmpc_omp_wait_deps: {
// Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
// kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
// kmp_depend_info_t *noalias_dep_list);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
CGM.Int32Ty, CGM.VoidPtrTy,
CGM.Int32Ty, CGM.VoidPtrTy};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
break;
}
case OMPRTL__kmpc_cancellationpoint: {
// Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
// global_tid, kmp_int32 cncl_kind)
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
break;
}
case OMPRTL__kmpc_cancel: {
// Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
// kmp_int32 cncl_kind)
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
break;
}
case OMPRTL__kmpc_push_num_teams: {
// Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
// kmp_int32 num_teams, kmp_int32 num_threads)
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
break;
}
case OMPRTL__kmpc_fork_teams: {
// Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
// microtask, ...);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
getKmpc_MicroPointerTy()};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
break;
}
case OMPRTL__kmpc_taskloop: {
// Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
// if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
// sched, kmp_uint64 grainsize, void *task_dup);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
CGM.IntTy,
CGM.VoidPtrTy,
CGM.IntTy,
CGM.Int64Ty->getPointerTo(),
CGM.Int64Ty->getPointerTo(),
CGM.Int64Ty,
CGM.IntTy,
CGM.IntTy,
CGM.Int64Ty,
CGM.VoidPtrTy};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
break;
}
case OMPRTL__kmpc_doacross_init: {
// Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
// num_dims, struct kmp_dim *dims);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
CGM.Int32Ty,
CGM.Int32Ty,
CGM.VoidPtrTy};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
break;
}
case OMPRTL__kmpc_doacross_fini: {
// Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
break;
}
case OMPRTL__kmpc_doacross_post: {
// Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
// *vec);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
CGM.Int64Ty->getPointerTo()};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
break;
}
case OMPRTL__kmpc_doacross_wait: {
// Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
// *vec);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
CGM.Int64Ty->getPointerTo()};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
break;
}
case OMPRTL__tgt_target: {
// Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
// arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
// *arg_types);
llvm::Type *TypeParams[] = {CGM.Int32Ty,
CGM.VoidPtrTy,
CGM.Int32Ty,
CGM.VoidPtrPtrTy,
CGM.VoidPtrPtrTy,
CGM.SizeTy->getPointerTo(),
CGM.Int32Ty->getPointerTo()};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
break;
}
case OMPRTL__tgt_target_teams: {
// Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
// int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
// int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
llvm::Type *TypeParams[] = {CGM.Int32Ty,
CGM.VoidPtrTy,
CGM.Int32Ty,
CGM.VoidPtrPtrTy,
CGM.VoidPtrPtrTy,
CGM.SizeTy->getPointerTo(),
CGM.Int32Ty->getPointerTo(),
CGM.Int32Ty,
CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
break;
}
case OMPRTL__tgt_register_lib: {
// Build void __tgt_register_lib(__tgt_bin_desc *desc);
QualType ParamTy =
CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
break;
}
case OMPRTL__tgt_unregister_lib: {
// Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
QualType ParamTy =
CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
break;
}
case OMPRTL__tgt_target_data_begin: {
// Build void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
// void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
llvm::Type *TypeParams[] = {CGM.Int32Ty,
CGM.Int32Ty,
CGM.VoidPtrPtrTy,
CGM.VoidPtrPtrTy,
CGM.SizeTy->getPointerTo(),
CGM.Int32Ty->getPointerTo()};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
break;
}
case OMPRTL__tgt_target_data_end: {
// Build void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
// void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
llvm::Type *TypeParams[] = {CGM.Int32Ty,
CGM.Int32Ty,
CGM.VoidPtrPtrTy,
CGM.VoidPtrPtrTy,
CGM.SizeTy->getPointerTo(),
CGM.Int32Ty->getPointerTo()};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
break;
}
case OMPRTL__tgt_target_data_update: {
// Build void __tgt_target_data_update(int32_t device_id, int32_t arg_num,
// void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
llvm::Type *TypeParams[] = {CGM.Int32Ty,
CGM.Int32Ty,
CGM.VoidPtrPtrTy,
CGM.VoidPtrPtrTy,
CGM.SizeTy->getPointerTo(),
CGM.Int32Ty->getPointerTo()};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
break;
}
}
assert(RTLFn && "Unable to find OpenMP runtime function");
return RTLFn;
}
llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
bool IVSigned) {
assert((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime");
auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
: "__kmpc_for_static_init_4u")
: (IVSigned ? "__kmpc_for_static_init_8"
: "__kmpc_for_static_init_8u");
auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
auto PtrTy = llvm::PointerType::getUnqual(ITy);
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), // loc
CGM.Int32Ty, // tid
CGM.Int32Ty, // schedtype
llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
PtrTy, // p_lower
PtrTy, // p_upper
PtrTy, // p_stride
ITy, // incr
ITy // chunk
};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
return CGM.CreateRuntimeFunction(FnTy, Name);
}
llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
bool IVSigned) {
assert((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime");
auto Name =
IVSize == 32
? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
: (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
CGM.Int32Ty, // tid
CGM.Int32Ty, // schedtype
ITy, // lower
ITy, // upper
ITy, // stride
ITy // chunk
};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
return CGM.CreateRuntimeFunction(FnTy, Name);
}
llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
bool IVSigned) {
assert((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime");
auto Name =
IVSize == 32
? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
: (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), // loc
CGM.Int32Ty, // tid
};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
return CGM.CreateRuntimeFunction(FnTy, Name);
}
llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
bool IVSigned) {
assert((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime");
auto Name =
IVSize == 32
? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
: (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
auto PtrTy = llvm::PointerType::getUnqual(ITy);
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), // loc
CGM.Int32Ty, // tid
llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
PtrTy, // p_lower
PtrTy, // p_upper
PtrTy // p_stride
};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
return CGM.CreateRuntimeFunction(FnTy, Name);
}
llvm::Constant *
CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
assert(!CGM.getLangOpts().OpenMPUseTLS ||
!CGM.getContext().getTargetInfo().isTLSSupported());
// Lookup the entry, lazily creating it if necessary.
return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
Twine(CGM.getMangledName(VD)) + ".cache.");
}
Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
const VarDecl *VD,
Address VDAddr,
SourceLocation Loc) {
if (CGM.getLangOpts().OpenMPUseTLS &&
CGM.getContext().getTargetInfo().isTLSSupported())
return VDAddr;
auto VarTy = VDAddr.getElementType();
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
CGM.Int8PtrTy),
CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
getOrCreateThreadPrivateCache(VD)};
return Address(CGF.EmitRuntimeCall(
createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
VDAddr.getAlignment());
}
void CGOpenMPRuntime::emitThreadPrivateVarInit(
CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
// Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
// library.
auto OMPLoc = emitUpdateLocation(CGF, Loc);
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
OMPLoc);
// Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
// to register constructor/destructor for variable.
llvm::Value *Args[] = {OMPLoc,
CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
CGM.VoidPtrTy),
Ctor, CopyCtor, Dtor};
CGF.EmitRuntimeCall(
createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
}
llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
const VarDecl *VD, Address VDAddr, SourceLocation Loc,
bool PerformInit, CodeGenFunction *CGF) {
if (CGM.getLangOpts().OpenMPUseTLS &&
CGM.getContext().getTargetInfo().isTLSSupported())
return nullptr;
VD = VD->getDefinition(CGM.getContext());
if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
ThreadPrivateWithDefinition.insert(VD);
QualType ASTTy = VD->getType();
llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
auto Init = VD->getAnyInitializer();
if (CGM.getLangOpts().CPlusPlus && PerformInit) {
// Generate function that re-emits the declaration's initializer into the
// threadprivate copy of the variable VD
CodeGenFunction CtorCGF(CGM);
FunctionArgList Args;
ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
/*Id=*/nullptr, CGM.getContext().VoidPtrTy);
Args.push_back(&Dst);
auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
CGM.getContext().VoidPtrTy, Args);
auto FTy = CGM.getTypes().GetFunctionType(FI);
auto Fn = CGM.CreateGlobalInitOrDestructFunction(
FTy, ".__kmpc_global_ctor_.", FI, Loc);
CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
Args, SourceLocation());
auto ArgVal = CtorCGF.EmitLoadOfScalar(
CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
CGM.getContext().VoidPtrTy, Dst.getLocation());
Address Arg = Address(ArgVal, VDAddr.getAlignment());
Arg = CtorCGF.Builder.CreateElementBitCast(Arg,
CtorCGF.ConvertTypeForMem(ASTTy));
CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
/*IsInitializer=*/true);
ArgVal = CtorCGF.EmitLoadOfScalar(
CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
CGM.getContext().VoidPtrTy, Dst.getLocation());
CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
CtorCGF.FinishFunction();
Ctor = Fn;
}
if (VD->getType().isDestructedType() != QualType::DK_none) {
// Generate function that emits destructor call for the threadprivate copy
// of the variable VD
CodeGenFunction DtorCGF(CGM);
FunctionArgList Args;
ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
/*Id=*/nullptr, CGM.getContext().VoidPtrTy);
Args.push_back(&Dst);
auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
CGM.getContext().VoidTy, Args);
auto FTy = CGM.getTypes().GetFunctionType(FI);
auto Fn = CGM.CreateGlobalInitOrDestructFunction(
FTy, ".__kmpc_global_dtor_.", FI, Loc);
auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
SourceLocation());
// Create a scope with an artificial location for the body of this function.
auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
auto ArgVal = DtorCGF.EmitLoadOfScalar(
DtorCGF.GetAddrOfLocalVar(&Dst),
/*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
DtorCGF.getDestroyer(ASTTy.isDestructedType()),
DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
DtorCGF.FinishFunction();
Dtor = Fn;
}
// Do not emit init function if it is not required.
if (!Ctor && !Dtor)
return nullptr;
llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
auto CopyCtorTy =
llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
/*isVarArg=*/false)->getPointerTo();
// Copying constructor for the threadprivate variable.
// Must be NULL - reserved by runtime, but currently it requires that this
// parameter is always NULL. Otherwise it fires assertion.
CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
if (Ctor == nullptr) {
auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
/*isVarArg=*/false)->getPointerTo();
Ctor = llvm::Constant::getNullValue(CtorTy);
}
if (Dtor == nullptr) {
auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
/*isVarArg=*/false)->getPointerTo();
Dtor = llvm::Constant::getNullValue(DtorTy);
}
if (!CGF) {
auto InitFunctionTy =
llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
InitFunctionTy, ".__omp_threadprivate_init_.",
CGM.getTypes().arrangeNullaryFunction());
CodeGenFunction InitCGF(CGM);
FunctionArgList ArgList;
InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
CGM.getTypes().arrangeNullaryFunction(), ArgList,
Loc);
emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
InitCGF.FinishFunction();
return InitFunction;
}
emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
}
return nullptr;
}
/// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
/// function. Here is the logic:
/// if (Cond) {
/// ThenGen();
/// } else {
/// ElseGen();
/// }
static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
const RegionCodeGenTy &ThenGen,
const RegionCodeGenTy &ElseGen) {
CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
// If the condition constant folds and can be elided, try to avoid emitting
// the condition and the dead arm of the if/else.
bool CondConstant;
if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
if (CondConstant)
ThenGen(CGF);
else
ElseGen(CGF);
return;
}
// Otherwise, the condition did not fold, or we couldn't elide it. Just
// emit the conditional branch.
auto ThenBlock = CGF.createBasicBlock("omp_if.then");
auto ElseBlock = CGF.createBasicBlock("omp_if.else");
auto ContBlock = CGF.createBasicBlock("omp_if.end");
CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
// Emit the 'then' code.
CGF.EmitBlock(ThenBlock);
ThenGen(CGF);
CGF.EmitBranch(ContBlock);
// Emit the 'else' code if present.
// There is no need to emit line number for unconditional branch.
(void)ApplyDebugLocation::CreateEmpty(CGF);
CGF.EmitBlock(ElseBlock);
ElseGen(CGF);
// There is no need to emit line number for unconditional branch.
(void)ApplyDebugLocation::CreateEmpty(CGF);
CGF.EmitBranch(ContBlock);
// Emit the continuation block for code after the if.
CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
}
void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
llvm::Value *OutlinedFn,
ArrayRef<llvm::Value *> CapturedVars,
const Expr *IfCond) {
if (!CGF.HaveInsertPoint())
return;
auto *RTLoc = emitUpdateLocation(CGF, Loc);
auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
PrePostActionTy &) {
// Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
auto &RT = CGF.CGM.getOpenMPRuntime();
llvm::Value *Args[] = {
RTLoc,
CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
llvm::SmallVector<llvm::Value *, 16> RealArgs;
RealArgs.append(std::begin(Args), std::end(Args));
RealArgs.append(CapturedVars.begin(), CapturedVars.end());
auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
CGF.EmitRuntimeCall(RTLFn, RealArgs);
};
auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
PrePostActionTy &) {
auto &RT = CGF.CGM.getOpenMPRuntime();
auto ThreadID = RT.getThreadID(CGF, Loc);
// Build calls:
// __kmpc_serialized_parallel(&Loc, GTid);
llvm::Value *Args[] = {RTLoc, ThreadID};
CGF.EmitRuntimeCall(
RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
// OutlinedFn(&GTid, &zero, CapturedStruct);
auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
Address ZeroAddr =
CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
/*Name*/ ".zero.addr");
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
// __kmpc_end_serialized_parallel(&Loc, GTid);
llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
CGF.EmitRuntimeCall(
RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
EndArgs);
};
if (IfCond)
emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
else {
RegionCodeGenTy ThenRCG(ThenGen);
ThenRCG(CGF);
}
}
// If we're inside an (outlined) parallel region, use the region info's
// thread-ID variable (it is passed in a first argument of the outlined function
// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
// regular serial code region, get thread ID by calling kmp_int32
// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
// return the address of that temp.
Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
SourceLocation Loc) {
if (auto *OMPRegionInfo =
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
if (OMPRegionInfo->getThreadIDVariable())
return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
auto ThreadID = getThreadID(CGF, Loc);
auto Int32Ty =
CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
CGF.EmitStoreOfScalar(ThreadID,
CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
return ThreadIDTemp;
}
llvm::Constant *
CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
const llvm::Twine &Name) {
SmallString<256> Buffer;
llvm::raw_svector_ostream Out(Buffer);
Out << Name;
auto RuntimeName = Out.str();
auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
if (Elem.second) {
assert(Elem.second->getType()->getPointerElementType() == Ty &&
"OMP internal variable has different type than requested");
return &*Elem.second;
}
return Elem.second = new llvm::GlobalVariable(
CGM.getModule(), Ty, /*IsConstant*/ false,
llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
Elem.first());
}
llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
llvm::Twine Name(".gomp_critical_user_", CriticalName);
return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
}
namespace {
/// Common pre(post)-action for different OpenMP constructs.
class CommonActionTy final : public PrePostActionTy {
llvm::Value *EnterCallee;
ArrayRef<llvm::Value *> EnterArgs;
llvm::Value *ExitCallee;
ArrayRef<llvm::Value *> ExitArgs;
bool Conditional;
llvm::BasicBlock *ContBlock = nullptr;
public:
CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs,
llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs,
bool Conditional = false)
: EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
ExitArgs(ExitArgs), Conditional(Conditional) {}
void Enter(CodeGenFunction &CGF) override {
llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
if (Conditional) {
llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
ContBlock = CGF.createBasicBlock("omp_if.end");
// Generate the branch (If-stmt)
CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
CGF.EmitBlock(ThenBlock);
}
}
void Done(CodeGenFunction &CGF) {
// Emit the rest of blocks/branches
CGF.EmitBranch(ContBlock);
CGF.EmitBlock(ContBlock, true);
}
void Exit(CodeGenFunction &CGF) override {
CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
}
};
} // anonymous namespace
void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
StringRef CriticalName,
const RegionCodeGenTy &CriticalOpGen,
SourceLocation Loc, const Expr *Hint) {
// __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
// CriticalOpGen();
// __kmpc_end_critical(ident_t *, gtid, Lock);
// Prepare arguments and build a call to __kmpc_critical
if (!CGF.HaveInsertPoint())
return;
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
getCriticalRegionLock(CriticalName)};
llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
std::end(Args));
if (Hint) {
EnterArgs.push_back(CGF.Builder.CreateIntCast(
CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
}
CommonActionTy Action(
createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
: OMPRTL__kmpc_critical),
EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
CriticalOpGen.setAction(Action);
emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
}
void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
const RegionCodeGenTy &MasterOpGen,
SourceLocation Loc) {
if (!CGF.HaveInsertPoint())
return;
// if(__kmpc_master(ident_t *, gtid)) {
// MasterOpGen();
// __kmpc_end_master(ident_t *, gtid);
// }
// Prepare arguments and build a call to __kmpc_master
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
/*Conditional=*/true);
MasterOpGen.setAction(Action);
emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
Action.Done(CGF);
}
void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
SourceLocation Loc) {
if (!CGF.HaveInsertPoint())
return;
// Build call __kmpc_omp_taskyield(loc, thread_id, 0);
llvm::Value *Args[] = {
emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
Region->emitUntiedSwitch(CGF);
}
void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
const RegionCodeGenTy &TaskgroupOpGen,
SourceLocation Loc) {
if (!CGF.HaveInsertPoint())
return;
// __kmpc_taskgroup(ident_t *, gtid);
// TaskgroupOpGen();
// __kmpc_end_taskgroup(ident_t *, gtid);
// Prepare arguments and build a call to __kmpc_taskgroup
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
Args);
TaskgroupOpGen.setAction(Action);
emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
}
/// Given an array of pointers to variables, project the address of a
/// given variable.
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
unsigned Index, const VarDecl *Var) {
// Pull out the pointer to the variable.
Address PtrAddr =
CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
Addr = CGF.Builder.CreateElementBitCast(
Addr, CGF.ConvertTypeForMem(Var->getType()));
return Addr;
}
static llvm::Value *emitCopyprivateCopyFunction(
CodeGenModule &CGM, llvm::Type *ArgsType,
ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
auto &C = CGM.getContext();
// void copy_func(void *LHSArg, void *RHSArg);
FunctionArgList Args;
ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
C.VoidPtrTy);
ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
C.VoidPtrTy);
Args.push_back(&LHSArg);
Args.push_back(&RHSArg);
auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
auto *Fn = llvm::Function::Create(
CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
".omp.copyprivate.copy_func", &CGM.getModule());
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
CodeGenFunction CGF(CGM);
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
// Dest = (void*[n])(LHSArg);
// Src = (void*[n])(RHSArg);
Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
ArgsType), CGF.getPointerAlign());
Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
ArgsType), CGF.getPointerAlign());
// *(Type0*)Dst[0] = *(Type0*)Src[0];
// *(Type1*)Dst[1] = *(Type1*)Src[1];
// ...
// *(Typen*)Dst[n] = *(Typen*)Src[n];
for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
QualType Type = VD->getType();
CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
}
CGF.FinishFunction();
return Fn;
}
void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
const RegionCodeGenTy &SingleOpGen,
SourceLocation Loc,
ArrayRef<const Expr *> CopyprivateVars,
ArrayRef<const Expr *> SrcExprs,
ArrayRef<const Expr *> DstExprs,
ArrayRef<const Expr *> AssignmentOps) {
if (!CGF.HaveInsertPoint())
return;
assert(CopyprivateVars.size() == SrcExprs.size() &&
CopyprivateVars.size() == DstExprs.size() &&
CopyprivateVars.size() == AssignmentOps.size());
auto &C = CGM.getContext();
// int32 did_it = 0;
// if(__kmpc_single(ident_t *, gtid)) {
// SingleOpGen();
// __kmpc_end_single(ident_t *, gtid);
// did_it = 1;
// }
// call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
// <copy_func>, did_it);
Address DidIt = Address::invalid();
if (!CopyprivateVars.empty()) {
// int32 did_it = 0;
auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
}
// Prepare arguments and build a call to __kmpc_single
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
/*Conditional=*/true);
SingleOpGen.setAction(Action);
emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
if (DidIt.isValid()) {
// did_it = 1;
CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
}
Action.Done(CGF);
// call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
// <copy_func>, did_it);
if (DidIt.isValid()) {
llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
auto CopyprivateArrayTy =
C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
/*IndexTypeQuals=*/0);
// Create a list of all private variables for copyprivate.
Address CopyprivateList =
CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
Address Elem = CGF.Builder.CreateConstArrayGEP(
CopyprivateList, I, CGF.getPointerSize());
CGF.Builder.CreateStore(
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
Elem);
}
// Build function that copies private values from single region to all other
// threads in the corresponding parallel region.
auto *CpyFn = emitCopyprivateCopyFunction(
CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
Address CL =
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
CGF.VoidPtrTy);
auto *DidItVal = CGF.Builder.CreateLoad(DidIt);
llvm::Value *Args[] = {
emitUpdateLocation(CGF, Loc), // ident_t *<loc>
getThreadID(CGF, Loc), // i32 <gtid>
BufSize, // size_t <buf_size>
CL.getPointer(), // void *<copyprivate list>
CpyFn, // void (*) (void *, void *) <copy_func>
DidItVal // i32 did_it
};
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
}
}
void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
const RegionCodeGenTy &OrderedOpGen,
SourceLocation Loc, bool IsThreads) {
if (!CGF.HaveInsertPoint())
return;
// __kmpc_ordered(ident_t *, gtid);
// OrderedOpGen();
// __kmpc_end_ordered(ident_t *, gtid);
// Prepare arguments and build a call to __kmpc_ordered
if (IsThreads) {
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
createRuntimeFunction(OMPRTL__kmpc_end_ordered),
Args);
OrderedOpGen.setAction(Action);
emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
return;
}
emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
}
void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
OpenMPDirectiveKind Kind, bool EmitChecks,
bool ForceSimpleCall) {
if (!CGF.HaveInsertPoint())
return;
// Build call __kmpc_cancel_barrier(loc, thread_id);
// Build call __kmpc_barrier(loc, thread_id);
unsigned Flags;
if (Kind == OMPD_for)
Flags = OMP_IDENT_BARRIER_IMPL_FOR;
else if (Kind == OMPD_sections)
Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
else if (Kind == OMPD_single)
Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
else if (Kind == OMPD_barrier)
Flags = OMP_IDENT_BARRIER_EXPL;
else
Flags = OMP_IDENT_BARRIER_IMPL;
// Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
// thread_id);
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
getThreadID(CGF, Loc)};
if (auto *OMPRegionInfo =
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
auto *Result = CGF.EmitRuntimeCall(
createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
if (EmitChecks) {
// if (__kmpc_cancel_barrier()) {
// exit from construct;
// }
auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
auto *ContBB = CGF.createBasicBlock(".cancel.continue");
auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
CGF.EmitBlock(ExitBB);
// exit from construct;
auto CancelDestination =
CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
CGF.EmitBranchThroughCleanup(CancelDestination);
CGF.EmitBlock(ContBB, /*IsFinished=*/true);
}
return;
}
}
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
}
/// \brief Map the OpenMP loop schedule to the runtime enumeration.
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
bool Chunked, bool Ordered) {
switch (ScheduleKind) {
case OMPC_SCHEDULE_static:
return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
: (Ordered ? OMP_ord_static : OMP_sch_static);