blob: 1a1db63ceb717d7acdda60d0c25abb7803bdf1e9 [file] [log] [blame] [edit]
// RUN: %clang_cc1 -emit-llvm -x hip %s -o - -triple x86_64-linux-gnu \
// RUN: | FileCheck -check-prefixes=CHECK,HOST %s
// RUN: %clang_cc1 -emit-llvm -x hip %s -o - -triple amdgcn-amd-amdhsa -fcuda-is-device \
// RUN: | FileCheck -check-prefixes=CHECK,DEV %s
#include "Inputs/cuda.h"
// CHECK: %class.anon = type { ptr, float, ptr, ptr }
// CHECK: %class.anon.0 = type { ptr, float, ptr, ptr }
// CHECK: %class.anon.1 = type { ptr, ptr, ptr }
// CHECK: %class.anon.2 = type { ptr, float, ptr, ptr }
// HOST: call void @_ZN8DevByVal21__device_stub__kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr noundef byval(%class.anon)
// DEV: define amdgpu_kernel void @_ZN8DevByVal6kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr addrspace(4) noundef byref(%class.anon)
// Only the device function passes arugments by value.
namespace DevByVal {
__device__ float fun(float x, float y) {
return x;
}
float fun(const float &x, const float &y) {
return x;
}
template<typename F>
void __global__ kernel(F f)
{
f(1);
}
void test(float const * fl, float const * A, float * Vf)
{
float constexpr small(1.0e-25);
auto lambda = [=] __device__ __host__ (unsigned int n) {
float const value = fun(small, fl[0]);
Vf[0] = value * A[0];
};
kernel<<<1, 1>>>(lambda);
}
}
// HOST: call void @_ZN9HostByVal21__device_stub__kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr noundef byval(%class.anon.0)
// DEV: define amdgpu_kernel void @_ZN9HostByVal6kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr addrspace(4) noundef byref(%class.anon.0)
// Only the host function passes arugments by value.
namespace HostByVal {
float fun(float x, float y) {
return x;
}
__device__ float fun(const float &x, const float &y) {
return x;
}
template<typename F>
void __global__ kernel(F f)
{
f(1);
}
void test(float const * fl, float const * A, float * Vf)
{
float constexpr small(1.0e-25);
auto lambda = [=] __device__ __host__ (unsigned int n) {
float const value = fun(small, fl[0]);
Vf[0] = value * A[0];
};
kernel<<<1, 1>>>(lambda);
}
}
// HOST: call void @_ZN9BothByVal21__device_stub__kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr noundef byval(%class.anon.1)
// DEV: define amdgpu_kernel void @_ZN9BothByVal6kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr addrspace(4) noundef byref(%class.anon.1)
// Both the host and device functions pass arugments by value.
namespace BothByVal {
float fun(float x, float y) {
return x;
}
__device__ float fun(float x, float y) {
return x;
}
template<typename F>
void __global__ kernel(F f)
{
f(1);
}
void test(float const * fl, float const * A, float * Vf)
{
float constexpr small(1.0e-25);
auto lambda = [=] __device__ __host__ (unsigned int n) {
float const value = fun(small, fl[0]);
Vf[0] = value * A[0];
};
kernel<<<1, 1>>>(lambda);
}
}
// HOST: call void @_ZN12NeitherByVal21__device_stub__kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr noundef byval(%class.anon.2)
// DEV: define amdgpu_kernel void @_ZN12NeitherByVal6kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr addrspace(4) noundef byref(%class.anon.2)
// Neither the host nor device function passes arugments by value.
namespace NeitherByVal {
float fun(const float& x, const float& y) {
return x;
}
__device__ float fun(const float& x, const float& y) {
return x;
}
template<typename F>
void __global__ kernel(F f)
{
f(1);
}
void test(float const * fl, float const * A, float * Vf)
{
float constexpr small(1.0e-25);
auto lambda = [=] __device__ __host__ (unsigned int n) {
float const value = fun(small, fl[0]);
Vf[0] = value * A[0];
};
kernel<<<1, 1>>>(lambda);
}
}