| |
| #include <iostream> |
| |
| #include <cublas_v2.h> |
| #include <cuda_runtime.h> |
| |
| #if defined(USE_THREADS_POSIX) && defined(HAVE_PTHREAD_H) |
| |
| # include <pthread.h> |
| static int verify_linking_to_pthread() |
| { |
| return static_cast<int>(pthread_self()); |
| } |
| #endif |
| |
| // this test only makes sense for versions of CUDA that ships |
| // static libraries that have separable compilation device symbols |
| #if __CUDACC_VER_MAJOR__ <= 9 |
| __global__ void deviceCublasSgemm(int n, float alpha, float beta, |
| const float* d_A, const float* d_B, |
| float* d_C) |
| { |
| cublasHandle_t cnpHandle; |
| cublasStatus_t status = cublasCreate(&cnpHandle); |
| |
| if (status != CUBLAS_STATUS_SUCCESS) { |
| return; |
| } |
| |
| // Call function defined in the cublas_device system static library. |
| // This way we can verify that we properly pass system libraries to the |
| // device link line |
| status = cublasSgemm(cnpHandle, CUBLAS_OP_N, CUBLAS_OP_N, n, n, n, &alpha, |
| d_A, n, d_B, n, &beta, d_C, n); |
| |
| cublasDestroy(cnpHandle); |
| } |
| #endif |
| |
| int choose_cuda_device() |
| { |
| int nDevices = 0; |
| cudaError_t err = cudaGetDeviceCount(&nDevices); |
| if (err != cudaSuccess) { |
| std::cerr << "Failed to retrieve the number of CUDA enabled devices" |
| << std::endl; |
| return 1; |
| } |
| for (int i = 0; i < nDevices; ++i) { |
| cudaDeviceProp prop; |
| cudaError_t err = cudaGetDeviceProperties(&prop, i); |
| if (err != cudaSuccess) { |
| std::cerr << "Could not retrieve properties from CUDA device " << i |
| << std::endl; |
| return 1; |
| } |
| |
| if (prop.major > 3 || (prop.major == 3 && prop.minor >= 5)) { |
| err = cudaSetDevice(i); |
| if (err != cudaSuccess) { |
| std::cout << "Could not select CUDA device " << i << std::endl; |
| } else { |
| return 0; |
| } |
| } |
| } |
| |
| std::cout << "Could not find a CUDA enabled card supporting compute >=3.5" |
| << std::endl; |
| return 1; |
| } |
| |
| int main(int argc, char** argv) |
| { |
| int ret = choose_cuda_device(); |
| if (ret) { |
| return 0; |
| } |
| |
| #if __CUDACC_VER_MAJOR__ <= 9 |
| // initial values that will make sure that the cublasSgemm won't actually |
| // do any work |
| int n = 0; |
| float alpha = 1; |
| float beta = 1; |
| float* d_A = nullptr; |
| float* d_B = nullptr; |
| float* d_C = nullptr; |
| deviceCublasSgemm<<<1, 1>>>(n, alpha, beta, d_A, d_B, d_C); |
| #endif |
| |
| return 0; |
| } |