| // RUN: %clang_cc1 -emit-llvm -x hip %s -o - -triple x86_64-linux-gnu \ |
| // RUN: | FileCheck -check-prefixes=CHECK,HOST %s |
| // RUN: %clang_cc1 -emit-llvm -x hip %s -o - -triple amdgcn-amd-amdhsa -fcuda-is-device \ |
| // RUN: | FileCheck -check-prefixes=CHECK,DEV %s |
| |
| #include "Inputs/cuda.h" |
| |
| // CHECK: %class.anon = type { ptr, float, ptr, ptr } |
| // CHECK: %class.anon.0 = type { ptr, float, ptr, ptr } |
| // CHECK: %class.anon.1 = type { ptr, ptr, ptr } |
| // CHECK: %class.anon.2 = type { ptr, float, ptr, ptr } |
| |
| // HOST: call void @_ZN8DevByVal21__device_stub__kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr noundef byval(%class.anon) |
| // DEV: define amdgpu_kernel void @_ZN8DevByVal6kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr addrspace(4) noundef byref(%class.anon) |
| |
| // Only the device function passes arugments by value. |
| namespace DevByVal { |
| __device__ float fun(float x, float y) { |
| return x; |
| } |
| |
| float fun(const float &x, const float &y) { |
| return x; |
| } |
| |
| template<typename F> |
| void __global__ kernel(F f) |
| { |
| f(1); |
| } |
| |
| void test(float const * fl, float const * A, float * Vf) |
| { |
| float constexpr small(1.0e-25); |
| |
| auto lambda = [=] __device__ __host__ (unsigned int n) { |
| float const value = fun(small, fl[0]); |
| Vf[0] = value * A[0]; |
| }; |
| kernel<<<1, 1>>>(lambda); |
| } |
| } |
| |
| // HOST: call void @_ZN9HostByVal21__device_stub__kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr noundef byval(%class.anon.0) |
| // DEV: define amdgpu_kernel void @_ZN9HostByVal6kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr addrspace(4) noundef byref(%class.anon.0) |
| |
| // Only the host function passes arugments by value. |
| namespace HostByVal { |
| float fun(float x, float y) { |
| return x; |
| } |
| |
| __device__ float fun(const float &x, const float &y) { |
| return x; |
| } |
| |
| template<typename F> |
| void __global__ kernel(F f) |
| { |
| f(1); |
| } |
| |
| void test(float const * fl, float const * A, float * Vf) |
| { |
| float constexpr small(1.0e-25); |
| |
| auto lambda = [=] __device__ __host__ (unsigned int n) { |
| float const value = fun(small, fl[0]); |
| Vf[0] = value * A[0]; |
| }; |
| kernel<<<1, 1>>>(lambda); |
| } |
| } |
| |
| // HOST: call void @_ZN9BothByVal21__device_stub__kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr noundef byval(%class.anon.1) |
| // DEV: define amdgpu_kernel void @_ZN9BothByVal6kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr addrspace(4) noundef byref(%class.anon.1) |
| |
| // Both the host and device functions pass arugments by value. |
| namespace BothByVal { |
| float fun(float x, float y) { |
| return x; |
| } |
| |
| __device__ float fun(float x, float y) { |
| return x; |
| } |
| |
| template<typename F> |
| void __global__ kernel(F f) |
| { |
| f(1); |
| } |
| |
| void test(float const * fl, float const * A, float * Vf) |
| { |
| float constexpr small(1.0e-25); |
| |
| auto lambda = [=] __device__ __host__ (unsigned int n) { |
| float const value = fun(small, fl[0]); |
| Vf[0] = value * A[0]; |
| }; |
| kernel<<<1, 1>>>(lambda); |
| } |
| } |
| |
| // HOST: call void @_ZN12NeitherByVal21__device_stub__kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr noundef byval(%class.anon.2) |
| // DEV: define amdgpu_kernel void @_ZN12NeitherByVal6kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr addrspace(4) noundef byref(%class.anon.2) |
| |
| // Neither the host nor device function passes arugments by value. |
| namespace NeitherByVal { |
| float fun(const float& x, const float& y) { |
| return x; |
| } |
| |
| __device__ float fun(const float& x, const float& y) { |
| return x; |
| } |
| |
| template<typename F> |
| void __global__ kernel(F f) |
| { |
| f(1); |
| } |
| |
| void test(float const * fl, float const * A, float * Vf) |
| { |
| float constexpr small(1.0e-25); |
| |
| auto lambda = [=] __device__ __host__ (unsigned int n) { |
| float const value = fun(small, fl[0]); |
| Vf[0] = value * A[0]; |
| }; |
| kernel<<<1, 1>>>(lambda); |
| } |
| } |