Make omp.target[_{enter,exit}]_data and omp.target_update host-only ops
diff --git a/flang/test/Integration/OpenMP/target-use-device-nested.f90 b/flang/test/Integration/OpenMP/target-use-device-nested.f90
new file mode 100644
index 0000000..9bb4c39
--- /dev/null
+++ b/flang/test/Integration/OpenMP/target-use-device-nested.f90
@@ -0,0 +1,46 @@
+!===----------------------------------------------------------------------===!
+! This directory can be used to add Integration tests involving multiple
+! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
+! contain executable tests. We should only add tests here sparingly and only
+! if there is no other way to test. Repeat this message in each test that is
+! added to this directory and sub-directories.
+!===----------------------------------------------------------------------===!
+
+! This tests check that target code nested inside a target data region which
+! has only use_device_ptr mapping corectly generates code on the device pass.
+
+!REQUIRES: amdgpu-registered-target
+!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-llvm -fopenmp -fopenmp-version=50 -fopenmp-is-target-device %s -o - | FileCheck %s
+
+program main
+ use iso_c_binding
+ implicit none
+ type(c_ptr) :: a
+ !$omp target data use_device_ptr(a)
+ !$omp target map(tofrom: a)
+ call foo(a)
+ !$omp end target
+ !$omp end target data
+end program
+
+! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading{{.*}}main_
+! CHECK-NEXT: entry:
+! CHECK-NEXT: %[[VAL_3:.*]] = alloca ptr, align 8, addrspace(5)
+! CHECK-NEXT: %[[ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[VAL_3]] to ptr
+! CHECK-NEXT: store ptr %[[VAL_4:.*]], ptr %[[ASCAST]], align 8
+! CHECK-NEXT: %[[VAL_5:.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @__omp_offloading_{{.*}}_kernel_environment to ptr), ptr %[[VAL_6:.*]])
+! CHECK-NEXT: %[[VAL_7:.*]] = icmp eq i32 %[[VAL_5]], -1
+! CHECK-NEXT: br i1 %[[VAL_7]], label %[[VAL_8:.*]], label %[[VAL_9:.*]]
+! CHECK: user_code.entry: ; preds = %[[VAL_10:.*]]
+! CHECK-NEXT: %[[VAL_11:.*]] = load ptr, ptr %[[ASCAST]], align 8
+! CHECK-NEXT: br label %[[AFTER_ALLOC:.*]]
+
+! CHECK: [[AFTER_ALLOC]]:
+! CHECK-NEXT: br label %[[VAL_12:.*]]
+
+! CHECK: [[VAL_12]]:
+! CHECK-NEXT: br label %[[TARGET_REG_ENTRY:.*]]
+
+! CHECK: [[TARGET_REG_ENTRY]]: ; preds = %[[VAL_12]]
+! CHECK-NEXT: call void @{{.*}}foo{{.*}}(ptr %[[VAL_11]])
+! CHECK-NEXT: br label
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 50ab206..8d72194 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -7080,17 +7080,6 @@
return InsertPointTy();
Builder.restoreIP(CodeGenIP);
- // Disable TargetData CodeGen on Device pass.
- if (Config.IsTargetDevice.value_or(false)) {
- if (BodyGenCB) {
- InsertPointOrErrorTy AfterIP =
- BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv);
- if (!AfterIP)
- return AfterIP.takeError();
- Builder.restoreIP(*AfterIP);
- }
- return Builder.saveIP();
- }
bool IsStandAlone = !BodyGenCB;
MapInfosTy *MapInfo;
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index b7a060b..a4558e5 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -6281,27 +6281,6 @@
EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32));
EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy());
- // Check that BodyGenCB is still made when IsTargetDevice is set to true.
- OMPBuilder.Config.setIsTargetDevice(true);
- bool CheckDevicePassBodyGen = false;
- auto BodyTargetCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
- CheckDevicePassBodyGen = true;
- Builder.restoreIP(CodeGenIP);
- CallInst *TargetDataCall =
- dyn_cast<CallInst>(BB->back().getPrevNode()->getPrevNode());
- // Make sure no begin_mapper call is present for device pass.
- EXPECT_EQ(TargetDataCall, nullptr);
- return Builder.saveIP();
- };
- ASSERT_EXPECTED_INIT(
- OpenMPIRBuilder::InsertPointTy, TargetDataIP2,
- OMPBuilder.createTargetData(Loc, AllocaIP, Builder.saveIP(),
- Builder.getInt64(DeviceID),
- /* IfCond= */ nullptr, Info, GenMapInfoCB,
- CustomMapperCB, nullptr, BodyTargetCB));
- Builder.restoreIP(TargetDataIP2);
- EXPECT_TRUE(CheckDevicePassBodyGen);
-
Builder.CreateRetVoid();
EXPECT_FALSE(verifyModule(*M, &errs()));
}
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 8323ca97a1..efd9461 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -4510,8 +4510,8 @@
llvm::OpenMPIRBuilder::TargetDataInfo info(/*RequiresDevicePointerInfo=*/true,
/*SeparateBeginEndCalls=*/true);
bool isTargetDevice = ompBuilder->Config.isTargetDevice();
- bool isOffloadEntry =
- isTargetDevice || !ompBuilder->Config.TargetTriples.empty();
+ assert(!isTargetDevice && "target data/enter/exit/update are host ops");
+ bool isOffloadEntry = !ompBuilder->Config.TargetTriples.empty();
LogicalResult result =
llvm::TypeSwitch<Operation *, LogicalResult>(op)
@@ -4687,30 +4687,17 @@
if (info.DevicePtrInfoMap.empty()) {
// For host device we still need to do the mapping for codegen,
// otherwise it may try to lookup a missing value.
- if (!ompBuilder->Config.IsTargetDevice.value_or(false)) {
- mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Address,
- blockArgIface.getUseDeviceAddrBlockArgs(),
- useDeviceAddrVars, mapData);
- mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer,
- blockArgIface.getUseDevicePtrBlockArgs(),
- useDevicePtrVars, mapData);
- }
+ mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Address,
+ blockArgIface.getUseDeviceAddrBlockArgs(),
+ useDeviceAddrVars, mapData);
+ mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer,
+ blockArgIface.getUseDevicePtrBlockArgs(), useDevicePtrVars,
+ mapData);
}
break;
case BodyGenTy::NoPriv:
// If device info is available then region has already been generated
if (info.DevicePtrInfoMap.empty()) {
- // For device pass, if use_device_ptr(addr) mappings were present,
- // we need to link them here before codegen.
- if (ompBuilder->Config.IsTargetDevice.value_or(false)) {
- mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Address,
- blockArgIface.getUseDeviceAddrBlockArgs(),
- useDeviceAddrVars, mapData);
- mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer,
- blockArgIface.getUseDevicePtrBlockArgs(),
- useDevicePtrVars, mapData);
- }
-
if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder,
moduleTranslation)))
return llvm::make_error<PreviouslyReportedError>();
@@ -6086,9 +6073,8 @@
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
if (ompBuilder->Config.isTargetDevice() &&
- !isa<omp::TargetOp, omp::TargetDataOp, omp::TargetEnterDataOp,
- omp::TargetExitDataOp, omp::TargetUpdateOp, omp::MapInfoOp,
- omp::TerminatorOp, omp::YieldOp>(op) &&
+ !isa<omp::TargetOp, omp::MapInfoOp, omp::TerminatorOp, omp::YieldOp>(
+ op) &&
isHostDeviceOp(op))
return op->emitOpError() << "unsupported host op found in device";
diff --git a/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir b/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir
deleted file mode 100644
index 9c6b06e..0000000
--- a/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir
+++ /dev/null
@@ -1,46 +0,0 @@
-// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
-
-// This tests check that target code nested inside a target data region which
-// has only use_device_ptr mapping corectly generates code on the device pass.
-
-// CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading{{.*}}main_
-// CHECK-NEXT: entry:
-// CHECK-NEXT: %[[VAL_3:.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: %[[ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[VAL_3]] to ptr
-// CHECK-NEXT: store ptr %[[VAL_4:.*]], ptr %[[ASCAST]], align 8
-// CHECK-NEXT: %[[VAL_5:.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_{{.*}}_kernel_environment, ptr %[[VAL_6:.*]])
-// CHECK-NEXT: %[[VAL_7:.*]] = icmp eq i32 %[[VAL_5]], -1
-// CHECK-NEXT: br i1 %[[VAL_7]], label %[[VAL_8:.*]], label %[[VAL_9:.*]]
-// CHECK: user_code.entry: ; preds = %[[VAL_10:.*]]
-// CHECK-NEXT: %[[VAL_11:.*]] = load ptr, ptr %[[ASCAST]], align 8
-// CHECK-NEXT: br label %[[AFTER_ALLOC:.*]]
-
-// CHECK: [[AFTER_ALLOC]]:
-// CHECK-NEXT: br label %[[VAL_12:.*]]
-
-// CHECK: [[VAL_12]]:
-// CHECK-NEXT: br label %[[TARGET_REG_ENTRY:.*]]
-
-// CHECK: [[TARGET_REG_ENTRY]]: ; preds = %[[VAL_12]]
-// CHECK-NEXT: %[[VAL_13:.*]] = load ptr, ptr %[[VAL_11]], align 8
-// CHECK-NEXT: store i32 999, ptr %[[VAL_13]], align 4
-// CHECK-NEXT: br label %[[VAL_14:.*]]
-module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true } {
- llvm.func @_QQmain() attributes {fir.bindc_name = "main"} {
- %0 = llvm.mlir.constant(1 : i64) : i64
- %a = llvm.alloca %0 x !llvm.ptr : (i64) -> !llvm.ptr<5>
- %ascast = llvm.addrspacecast %a : !llvm.ptr<5> to !llvm.ptr
- %map = omp.map.info var_ptr(%ascast : !llvm.ptr, !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
- omp.target_data use_device_ptr(%map -> %arg0 : !llvm.ptr) {
- %map1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
- omp.target map_entries(%map1 -> %arg1 : !llvm.ptr){
- %1 = llvm.mlir.constant(999 : i32) : i32
- %2 = llvm.load %arg1 : !llvm.ptr -> !llvm.ptr
- llvm.store %1, %2 : i32, !llvm.ptr
- omp.terminator
- }
- omp.terminator
- }
- llvm.return
- }
-}