diff options
Diffstat (limited to 'nerv/lib/matrix/cuda_helper.h')
-rw-r--r-- | nerv/lib/matrix/cuda_helper.h | 60 |
1 files changed, 49 insertions, 11 deletions
diff --git a/nerv/lib/matrix/cuda_helper.h b/nerv/lib/matrix/cuda_helper.h index 13d5728..2d18486 100644 --- a/nerv/lib/matrix/cuda_helper.h +++ b/nerv/lib/matrix/cuda_helper.h @@ -54,19 +54,26 @@ cudaDeviceSynchronize(); \ } while (0) -#define CHECK_SAME_DIMENSION(a, b, status) \ +#define CURAND_SAFE_SYNC_CALL(call, status) \ do { \ - if (!(a->nrow == b->nrow && a->ncol == b->ncol)) \ - NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0); \ + curandStatus_t err = (call); \ + if (err != CURAND_STATUS_SUCCESS) \ + { \ + NERV_SET_STATUS(status, MAT_CUBLAS_ERR, curandGetErrorString(err)); \ + return; \ + } \ + cudaDeviceSynchronize(); \ } while (0) -#define CHECK_SAME_DIMENSION_RET(a, b, status) \ +#define CURAND_SAFE_SYNC_CALL_RET(call, status) \ do { \ - if (!(a->nrow == b->nrow && a->ncol == b->ncol)) \ + curandStatus_t err = (call); \ + if (err != CURAND_STATUS_SUCCESS) \ { \ - NERV_SET_STATUS(status, MAT_MISMATCH_DIM, 0); \ + NERV_SET_STATUS(status, MAT_CUBLAS_ERR, curandGetErrorString(err)); \ return 0; \ } \ + cudaDeviceSynchronize(); \ } while (0) static const char *cublasGetErrorString(cublasStatus_t err) { @@ -96,15 +103,46 @@ static const char *cublasGetErrorString(cublasStatus_t err) { return "<unknown>"; } +static const char *curandGetErrorString(curandStatus_t err) { + switch (err) + { + case CURAND_STATUS_VERSION_MISMATCH: + return "Header file and linked library version do not match"; + case CURAND_STATUS_NOT_INITIALIZED: + return "Generator not initialized"; + case CURAND_STATUS_ALLOCATION_FAILED: + return "Memory allocation failed"; + case CURAND_STATUS_TYPE_ERROR: + return "Generator is wrong type"; + case CURAND_STATUS_OUT_OF_RANGE: + return "Argument out of range"; + case CURAND_STATUS_LENGTH_NOT_MULTIPLE: + return "Length requested is not a multple of dimension"; + case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED: + return "GPU does not have double precision required by MRG32k3a"; + case CURAND_STATUS_LAUNCH_FAILURE: + return "Kernel launch failure"; + case CURAND_STATUS_PREEXISTING_FAILURE: + return "Preexisting failure on library entry"; + case CURAND_STATUS_INITIALIZATION_FAILED: + return "Initialization of CUDA failed"; + case CURAND_STATUS_ARCH_MISMATCH: + return "Architecture mismatch, GPU does not support requested feature"; + case CURAND_STATUS_INTERNAL_ERROR: + return "Internal library error"; + } + return "<unknown>"; +} #define PROFILE_START \ do { \ - cudaEventRecord(profile_start, 0); + cudaEventRecord(context->profile_start, 0); #define PROFILE_STOP \ - cudaEventRecord(profile_stop, 0); \ - cudaEventSynchronize(profile_stop); \ + cudaEventRecord(context->profile_stop, 0); \ + cudaEventSynchronize(context->profile_stop); \ float milliseconds = 0; \ - cudaEventElapsedTime(&milliseconds, profile_start, profile_stop); \ - accu_profile(__func__, milliseconds / 1000); \ + cudaEventElapsedTime(&milliseconds, context->profile_start, \ + context->profile_stop); \ + nerv_cuda_context_accu_profile(context, __func__, milliseconds / 1000); \ } while (0); #define PROFILE_END |