#ifndef NERV_CUDA_HELPER_H
#define NERV_CUDA_HELPER_H
#include "cuda.h"
#include "cuda_runtime.h"
#include "driver_types.h"
#include "cublas_v2.h"
#include "curand.h"
#define CUBLAS_SAFE_SYNC_CALL_RET(call, status) \
do { \
cublasStatus_t err = (call); \
if (err != CUBLAS_STATUS_SUCCESS) \
{ \
NERV_SET_STATUS(status, MAT_CUBLAS_ERR, cublasGetErrorString(err)); \
return 0; \
} \
cudaDeviceSynchronize(); \
} while (0)
#define CUBLAS_SAFE_SYNC_CALL(call, status) \
do { \
cublasStatus_t err = (call); \
if (err != CUBLAS_STATUS_SUCCESS) \
NERV_EXIT_STATUS(status, MAT_CUBLAS_ERR, cublasGetErrorString(err)); \
cudaDeviceSynchronize(); \
} while (0)
#define CUDA_SAFE_CALL_RET(call, status) \
do { \
cudaError_t err = (call); \
if (err != cudaSuccess) \
{ \
NERV_SET_STATUS(status, MAT_CUDA_ERR, cudaGetErrorString(err)); \
return 0; \
} \
} while (0)
#define CUDA_SAFE_CALL(call, status) \
do { \
cudaError_t err = (call); \
if (err != cudaSuccess) \
NERV_EXIT_STATUS(status, MAT_CUDA_ERR, cudaGetErrorString(err)); \
} while (0)
#define CUDA_SAFE_SYNC_CALL(call, status) \
do { \
CUDA_SAFE_CALL(call, status); \
cudaDeviceSynchronize(); \
} while (0)
#define CUDA_SAFE_SYNC_CALL_RET(call, status) \
do { \
CUDA_SAFE_CALL_RET(call, status); \
cudaDeviceSynchronize(); \
} while (0)
#define CURAND_SAFE_SYNC_CALL(call, status) \
do { \
curandStatus_t err = (call); \
if (err != CURAND_STATUS_SUCCESS) \
{ \
NERV_SET_STATUS(status, MAT_CUBLAS_ERR, curandGetErrorString(err)); \
return; \
} \
cudaDeviceSynchronize(); \
} while (0)
#define CURAND_SAFE_SYNC_CALL_RET(call, status) \
do { \
curandStatus_t err = (call); \
if (err != CURAND_STATUS_SUCCESS) \
{ \
NERV_SET_STATUS(status, MAT_CUBLAS_ERR, curandGetErrorString(err)); \
return 0; \
} \
cudaDeviceSynchronize(); \
} while (0)
static const char *cublasGetErrorString(cublasStatus_t err) {
switch (err)
{
case CUBLAS_STATUS_SUCCESS:
return "CUBLAS_STATUS_SUCCESS";
case CUBLAS_STATUS_NOT_INITIALIZED:
return "CUBLAS_STATUS_NOT_INITIALIZED";
case CUBLAS_STATUS_ALLOC_FAILED:
return "CUBLAS_STATUS_ALLOC_FAILED";
case CUBLAS_STATUS_INVALID_VALUE:
return "CUBLAS_STATUS_INVALID_VALUE";
case CUBLAS_STATUS_ARCH_MISMATCH:
return "CUBLAS_STATUS_ARCH_MISMATCH";
case CUBLAS_STATUS_MAPPING_ERROR:
return "CUBLAS_STATUS_MAPPING_ERROR";
case CUBLAS_STATUS_EXECUTION_FAILED:
return "CUBLAS_STATUS_EXECUTION_FAILED";
case CUBLAS_STATUS_INTERNAL_ERROR:
return "CUBLAS_STATUS_INTERNAL_ERROR";
/* case CUBLAS_STATUS_NOT_SUPPORTED:
return "CUBLAS_STATUS_NOT_SUPPORTED";
case CUBLAS_STATUS_LICENSE_ERROR:
return "CUBLAS_STATUS_LICENSE_ERROR"; */
}
return "<unknown>";
}
static const char *curandGetErrorString(curandStatus_t err) {
switch (err)
{
case CURAND_STATUS_VERSION_MISMATCH:
return "Header file and linked library version do not match";
case CURAND_STATUS_NOT_INITIALIZED:
return "Generator not initialized";
case CURAND_STATUS_ALLOCATION_FAILED:
return "Memory allocation failed";
case CURAND_STATUS_TYPE_ERROR:
return "Generator is wrong type";
case CURAND_STATUS_OUT_OF_RANGE:
return "Argument out of range";
case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
return "Length requested is not a multple of dimension";
case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
return "GPU does not have double precision required by MRG32k3a";
case CURAND_STATUS_LAUNCH_FAILURE:
return "Kernel launch failure";
case CURAND_STATUS_PREEXISTING_FAILURE:
return "Preexisting failure on library entry";
case CURAND_STATUS_INITIALIZATION_FAILED:
return "Initialization of CUDA failed";
case CURAND_STATUS_ARCH_MISMATCH:
return "Architecture mismatch, GPU does not support requested feature";
case CURAND_STATUS_INTERNAL_ERROR:
return "Internal library error";
}
return "<unknown>";
}
#define PROFILE_START \
do { \
cudaEventRecord(context->profile_start, 0);
#define PROFILE_STOP \
cudaEventRecord(context->profile_stop, 0); \
cudaEventSynchronize(context->profile_stop); \
float milliseconds = 0; \
cudaEventElapsedTime(&milliseconds, context->profile_start, \
context->profile_stop); \
nerv_cuda_context_accu_profile(context, __func__, milliseconds / 1000); \
} while (0);
#define PROFILE_END
#endif