blob: 0a8d7a9d231ae60c953c829eadeb746d17f0d1ca [file] [log] [blame]
; RUN: opt -S -passes=openmpopt -pass-remarks=openmp-opt -openmp-print-gpu-kernels < %s | FileCheck %s
; RUN: opt -S -openmpopt -pass-remarks=openmp-opt -openmp-print-gpu-kernels < %s | FileCheck %s
; C input used for this test:
; void bar(void) {
; #pragma omp parallel
; { }
; }
; void foo(void) {
; #pragma omp target teams
; {
; #pragma omp parallel
; {}
; bar();
; #pragma omp parallel
; {}
; }
; }
; Verify we replace the function pointer uses for the first and last outlined
; region (1 and 3) but not for the middle one (2) because it could be called from
; another kernel.
; CHECK-DAG: @__omp_outlined__1_wrapper.ID = private constant i8 undef
; CHECK-DAG: @__omp_outlined__3_wrapper.ID = private constant i8 undef
; CHECK-DAG: icmp eq i8* %5, @__omp_outlined__1_wrapper.ID
; CHECK-DAG: icmp eq i8* %7, @__omp_outlined__3_wrapper.ID
; CHECK-DAG: call void @__kmpc_kernel_prepare_parallel(i8* @__omp_outlined__1_wrapper.ID)
; CHECK-DAG: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void ()* @__omp_outlined__2_wrapper to i8*))
; CHECK-DAG: call void @__kmpc_kernel_prepare_parallel(i8* @__omp_outlined__3_wrapper.ID)
%struct.ident_t = type { i32, i32, i32, i32, i8* }
define internal void @__omp_offloading_35_a1e179_foo_l7_worker() {
entry:
%work_fn = alloca i8*, align 8
%exec_status = alloca i8, align 1
store i8* null, i8** %work_fn, align 8
store i8 0, i8* %exec_status, align 1
br label %.await.work
.await.work: ; preds = %.barrier.parallel, %entry
call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
%0 = call i1 @__kmpc_kernel_parallel(i8** %work_fn)
%1 = zext i1 %0 to i8
store i8 %1, i8* %exec_status, align 1
%2 = load i8*, i8** %work_fn, align 8
%should_terminate = icmp eq i8* %2, null
br i1 %should_terminate, label %.exit, label %.select.workers
.select.workers: ; preds = %.await.work
%3 = load i8, i8* %exec_status, align 1
%is_active = icmp ne i8 %3, 0
br i1 %is_active, label %.execute.parallel, label %.barrier.parallel
.execute.parallel: ; preds = %.select.workers
%4 = call i32 @__kmpc_global_thread_num(%struct.ident_t* null)
%5 = load i8*, i8** %work_fn, align 8
%work_match = icmp eq i8* %5, bitcast (void ()* @__omp_outlined__1_wrapper to i8*)
br i1 %work_match, label %.execute.fn, label %.check.next
.execute.fn: ; preds = %.execute.parallel
call void @__omp_outlined__1_wrapper()
br label %.terminate.parallel
.check.next: ; preds = %.execute.parallel
%6 = load i8*, i8** %work_fn, align 8
%work_match1 = icmp eq i8* %6, bitcast (void ()* @__omp_outlined__2_wrapper to i8*)
br i1 %work_match1, label %.execute.fn2, label %.check.next3
.execute.fn2: ; preds = %.check.next
call void @__omp_outlined__2_wrapper()
br label %.terminate.parallel
.check.next3: ; preds = %.check.next
%7 = load i8*, i8** %work_fn, align 8
%work_match4 = icmp eq i8* %7, bitcast (void ()* @__omp_outlined__3_wrapper to i8*)
br i1 %work_match4, label %.execute.fn5, label %.check.next6
.execute.fn5: ; preds = %.check.next3
call void @__omp_outlined__3_wrapper()
br label %.terminate.parallel
.check.next6: ; preds = %.check.next3
%8 = bitcast i8* %2 to void ()*
call void %8()
br label %.terminate.parallel
.terminate.parallel: ; preds = %.check.next6, %.execute.fn5, %.execute.fn2, %.execute.fn
call void @__kmpc_kernel_end_parallel()
br label %.barrier.parallel
.barrier.parallel: ; preds = %.terminate.parallel, %.select.workers
call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
br label %.await.work
.exit: ; preds = %.await.work
ret void
}
define weak void @__omp_offloading_35_a1e179_foo_l7() {
call void @__omp_offloading_35_a1e179_foo_l7_worker()
call void @__omp_outlined__()
ret void
}
define internal void @__omp_outlined__() {
call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void ()* @__omp_outlined__1_wrapper to i8*))
call void @bar()
call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void ()* @__omp_outlined__3_wrapper to i8*))
ret void
}
define internal void @__omp_outlined__1() {
ret void
}
define internal void @__omp_outlined__1_wrapper() {
call void @__omp_outlined__1()
ret void
}
define hidden void @bar() {
call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void ()* @__omp_outlined__2_wrapper to i8*))
ret void
}
define internal void @__omp_outlined__2_wrapper() {
ret void
}
define internal void @__omp_outlined__3_wrapper() {
ret void
}
declare void @__kmpc_kernel_prepare_parallel(i8* %WorkFn)
declare zeroext i1 @__kmpc_kernel_parallel(i8** nocapture %WorkFn)
declare void @__kmpc_kernel_end_parallel()
declare void @__kmpc_barrier_simple_spmd(%struct.ident_t* nocapture readnone %loc_ref, i32 %tid)
declare i32 @__kmpc_global_thread_num(%struct.ident_t* nocapture readnone)
!nvvm.annotations = !{!0}
!0 = !{void ()* @__omp_offloading_35_a1e179_foo_l7, !"kernel", i32 1}