blob: 9f156192e10363e753f3220cfc18a89b7f9cf178 [file] [log] [blame]
//===---------- private.h - Target independent OpenMP target RTL ----------===//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// Private function declarations and helper macros for debugging output.
#include "device.h"
#include <Debug.h>
#include <SourceInfo.h>
#include <omptarget.h>
#include <cstdint>
extern int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
void **ArgsBase, void **Args, int64_t *ArgSizes,
int64_t *ArgTypes, map_var_info_t *ArgNames,
void **ArgMappers, AsyncInfoTy &AsyncInfo,
bool FromMapper = false);
extern int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
void **ArgBases, void **Args, int64_t *ArgSizes,
int64_t *ArgTypes, map_var_info_t *ArgNames,
void **ArgMappers, AsyncInfoTy &AsyncInfo,
bool FromMapper = false);
extern int targetDataUpdate(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
void **ArgsBase, void **Args, int64_t *ArgSizes,
int64_t *ArgTypes, map_var_info_t *ArgNames,
void **ArgMappers, AsyncInfoTy &AsyncInfo,
bool FromMapper = false);
extern int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
KernelArgsTy &KernelArgs, AsyncInfoTy &AsyncInfo);
extern int target_replay(ident_t *Loc, DeviceTy &Device, void *HostPtr,
void *DeviceMemory, int64_t DeviceMemorySize,
void **TgtArgs, ptrdiff_t *TgtOffsets, int32_t NumArgs,
int32_t NumTeams, int32_t ThreadLimit,
uint64_t LoopTripCount, AsyncInfoTy &AsyncInfo);
extern void handleTargetOutcome(bool Success, ident_t *Loc);
extern bool checkDeviceAndCtors(int64_t &DeviceID, ident_t *Loc);
extern void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind,
const char *Name);
extern void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind,
const char *Name);
extern void *targetLockExplicit(void *HostPtr, size_t Size, int DeviceNum,
const char *Name);
extern void targetUnlockExplicit(void *HostPtr, int DeviceNum,
const char *Name);
// This structure stores information of a mapped memory region.
struct MapComponentInfoTy {
void *Base;
void *Begin;
int64_t Size;
int64_t Type;
void *Name;
MapComponentInfoTy() = default;
MapComponentInfoTy(void *Base, void *Begin, int64_t Size, int64_t Type,
void *Name)
: Base(Base), Begin(Begin), Size(Size), Type(Type), Name(Name) {}
// This structure stores all components of a user-defined mapper. The number of
// components are dynamically decided, so we utilize C++ STL vector
// implementation here.
struct MapperComponentsTy {
llvm::SmallVector<MapComponentInfoTy> Components;
int32_t size() { return Components.size(); }
// The mapper function pointer type. It follows the signature below:
// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
// void *base, void *begin,
// size_t size, int64_t type,
// void * name);
typedef void (*MapperFuncPtrTy)(void *, void *, void *, int64_t, int64_t,
void *);
// Function pointer type for targetData* functions (targetDataBegin,
// targetDataEnd and targetDataUpdate).
typedef int (*TargetDataFuncPtrTy)(ident_t *, DeviceTy &, int32_t, void **,
void **, int64_t *, int64_t *,
map_var_info_t *, void **, AsyncInfoTy &,
// Implemented in libomp, they are called from within __tgt_* functions.
#ifdef __cplusplus
extern "C" {
* The ident structure that describes a source location.
* The struct is identical to the one in the kmp.h file.
* We maintain the same data structure for compatibility.
typedef int kmp_int32;
typedef intptr_t kmp_intptr_t;
// Compiler sends us this info:
typedef struct kmp_depend_info {
kmp_intptr_t base_addr;
size_t len;
struct {
bool in : 1;
bool out : 1;
bool mtx : 1;
} flags;
} kmp_depend_info_t;
// functions that extract info from libomp; keep in sync
int omp_get_default_device(void) __attribute__((weak));
int32_t __kmpc_global_thread_num(void *) __attribute__((weak));
int __kmpc_get_target_offload(void) __attribute__((weak));
void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
kmp_depend_info_t *noalias_dep_list)
void **__kmpc_omp_get_target_async_handle_ptr(kmp_int32 gtid)
bool __kmpc_omp_has_task_team(kmp_int32 gtid) __attribute__((weak));
// Invalid GTID as defined by libomp; keep in sync
#define KMP_GTID_DNE (-2)
#ifdef __cplusplus
#define TARGET_NAME Libomptarget
/// dump a table of all the host-target pointer pairs on failure
static inline void dumpTargetPointerMappings(const ident_t *Loc,
DeviceTy &Device) {
DeviceTy::HDTTMapAccessorTy HDTTMap =
if (HDTTMap->empty())
SourceInfo Kernel(Loc);
"OpenMP Host-Device pointer mappings after block at %s:%d:%d:\n",
Kernel.getFilename(), Kernel.getLine(), Kernel.getColumn());
INFO(OMP_INFOTYPE_ALL, Device.DeviceID, "%-18s %-18s %s %s %s %s\n",
"Host Ptr", "Target Ptr", "Size (B)", "DynRefCount", "HoldRefCount",
for (const auto &It : *HDTTMap) {
HostDataToTargetTy &HDTT = *It.HDTT;
SourceInfo Info(HDTT.HstPtrName);
DPxMOD " " DPxMOD " %-8" PRIuPTR " %-11s %-12s %s at %s:%d:%d\n",
DPxPTR(HDTT.HstPtrBegin), DPxPTR(HDTT.TgtPtrBegin),
HDTT.HstPtrEnd - HDTT.HstPtrBegin, HDTT.dynRefCountToStr().c_str(),
HDTT.holdRefCountToStr().c_str(), Info.getName(), Info.getFilename(),
Info.getLine(), Info.getColumn());
/// Print out the names and properties of the arguments to each kernel
static inline void
printKernelArguments(const ident_t *Loc, const int64_t DeviceId,
const int32_t ArgNum, const int64_t *ArgSizes,
const int64_t *ArgTypes, const map_var_info_t *ArgNames,
const char *RegionType) {
SourceInfo Info(Loc);
INFO(OMP_INFOTYPE_ALL, DeviceId, "%s at %s:%d:%d with %d arguments:\n",
RegionType, Info.getFilename(), Info.getLine(), Info.getColumn(),
for (int32_t I = 0; I < ArgNum; ++I) {
const map_var_info_t VarName = (ArgNames) ? ArgNames[I] : nullptr;
const char *Type = nullptr;
const char *Implicit =
(ArgTypes[I] & OMP_TGT_MAPTYPE_IMPLICIT) ? "(implicit)" : "";
Type = "tofrom";
else if (ArgTypes[I] & OMP_TGT_MAPTYPE_TO)
Type = "to";
else if (ArgTypes[I] & OMP_TGT_MAPTYPE_FROM)
Type = "from";
else if (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE)
Type = "private";
else if (ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL)
Type = "firstprivate";
else if (ArgSizes[I] != 0)
Type = "alloc";
Type = "use_address";
INFO(OMP_INFOTYPE_ALL, DeviceId, "%s(%s)[%" PRId64 "] %s\n", Type,
getNameFromMapping(VarName).c_str(), ArgSizes[I], Implicit);
// Wrapper for task stored async info objects.
class TaskAsyncInfoWrapperTy {
const int ExecThreadID = KMP_GTID_DNE;
AsyncInfoTy LocalAsyncInfo;
AsyncInfoTy *AsyncInfo = &LocalAsyncInfo;
void **TaskAsyncInfoPtr = nullptr;
TaskAsyncInfoWrapperTy(DeviceTy &Device)
: ExecThreadID(__kmpc_global_thread_num(NULL)), LocalAsyncInfo(Device) {
// If we failed to acquired the current global thread id, we cannot
// re-enqueue the current task. Thus we should use the local blocking async
// info.
if (ExecThreadID == KMP_GTID_DNE)
// Only tasks with an assigned task team can be re-enqueue and thus can
// use the non-blocking synchronization scheme. Thus we should use the local
// blocking async info, if we donĀ“t have one.
if (!__kmpc_omp_has_task_team(ExecThreadID))
// Acquire a pointer to the AsyncInfo stored inside the current task being
// executed.
TaskAsyncInfoPtr = __kmpc_omp_get_target_async_handle_ptr(ExecThreadID);
// If we cannot acquire such pointer, fallback to using the local blocking
// async info.
if (!TaskAsyncInfoPtr)
// When creating a new task async info, the task handle must always be
// invalid. We must never overwrite any task async handle and there should
// never be any valid handle store inside the task at this point.
assert((*TaskAsyncInfoPtr) == nullptr &&
"Task async handle is not empty when dispatching new device "
"operations. The handle was not cleared properly or "
"__tgt_target_nowait_query should have been called!");
// If no valid async handle is present, a new AsyncInfo will be allocated
// and stored in the current task.
AsyncInfo = new AsyncInfoTy(Device, AsyncInfoTy::SyncTy::NON_BLOCKING);
*TaskAsyncInfoPtr = (void *)AsyncInfo;
~TaskAsyncInfoWrapperTy() {
// Local async info destruction is automatically handled by ~AsyncInfoTy.
if (AsyncInfo == &LocalAsyncInfo)
// If the are device operations still pending, return immediately without
// deallocating the handle.
if (!AsyncInfo->isDone())
// Delete the handle and unset it from the OpenMP task data.
delete AsyncInfo;
*TaskAsyncInfoPtr = nullptr;
operator AsyncInfoTy &() { return *AsyncInfo; }
// Implement exponential backoff counting.
// Linearly increments until given maximum, exponentially decrements based on
// given backoff factor.
class ExponentialBackoff {
int64_t Count = 0;
const int64_t MaxCount = 0;
const int64_t CountThreshold = 0;
const float BackoffFactor = 0.0f;
ExponentialBackoff(int64_t MaxCount, int64_t CountThreshold,
float BackoffFactor)
: MaxCount(MaxCount), CountThreshold(CountThreshold),
BackoffFactor(BackoffFactor) {
assert(MaxCount >= 0 &&
"ExponentialBackoff: maximum count value should be non-negative");
assert(CountThreshold >= 0 &&
"ExponentialBackoff: count threshold value should be non-negative");
assert(BackoffFactor >= 0 && BackoffFactor < 1 &&
"ExponentialBackoff: backoff factor should be in [0, 1) interval");
void increment() { Count = std::min(Count + 1, MaxCount); }
void decrement() { Count *= BackoffFactor; }
bool isAboveThreshold() const { return Count > CountThreshold; }
#include "llvm/Support/TimeProfiler.h"
#define TIMESCOPE() llvm::TimeTraceScope TimeScope(__FUNCTION__)
SourceInfo SI(IDENT); \
llvm::TimeTraceScope TimeScope(__FUNCTION__, SI.getProfileLocation())
SourceInfo SI(IDENT); \
llvm::TimeTraceScope TimeScope(NAME, SI.getProfileLocation())
#define TIMESCOPE()