aboutsummaryrefslogtreecommitdiff
path: root/nerv/lib/matrix/cuda_helper.h
diff options
context:
space:
mode:
Diffstat (limited to 'nerv/lib/matrix/cuda_helper.h')
-rw-r--r--nerv/lib/matrix/cuda_helper.h63
1 files changed, 58 insertions, 5 deletions
diff --git a/nerv/lib/matrix/cuda_helper.h b/nerv/lib/matrix/cuda_helper.h
index 13d5728..5c75e38 100644
--- a/nerv/lib/matrix/cuda_helper.h
+++ b/nerv/lib/matrix/cuda_helper.h
@@ -54,6 +54,28 @@
cudaDeviceSynchronize(); \
} while (0)
+#define CURAND_SAFE_SYNC_CALL(call, status) \
+ do { \
+ curandStatus_t err = (call); \
+ if (err != CURAND_STATUS_SUCCESS) \
+ { \
+ NERV_SET_STATUS(status, MAT_CUBLAS_ERR, curandGetErrorString(err)); \
+ return; \
+ } \
+ cudaDeviceSynchronize(); \
+ } while (0)
+
+#define CURAND_SAFE_SYNC_CALL_RET(call, status) \
+ do { \
+ curandStatus_t err = (call); \
+ if (err != CURAND_STATUS_SUCCESS) \
+ { \
+ NERV_SET_STATUS(status, MAT_CUBLAS_ERR, curandGetErrorString(err)); \
+ return 0; \
+ } \
+ cudaDeviceSynchronize(); \
+ } while (0)
+
#define CHECK_SAME_DIMENSION(a, b, status) \
do { \
if (!(a->nrow == b->nrow && a->ncol == b->ncol)) \
@@ -96,15 +118,46 @@ static const char *cublasGetErrorString(cublasStatus_t err) {
return "<unknown>";
}
+static const char *curandGetErrorString(curandStatus_t err) {
+ switch (err)
+ {
+ case CURAND_STATUS_VERSION_MISMATCH:
+ return "Header file and linked library version do not match";
+ case CURAND_STATUS_NOT_INITIALIZED:
+ return "Generator not initialized";
+ case CURAND_STATUS_ALLOCATION_FAILED:
+ return "Memory allocation failed";
+ case CURAND_STATUS_TYPE_ERROR:
+ return "Generator is wrong type";
+ case CURAND_STATUS_OUT_OF_RANGE:
+ return "Argument out of range";
+ case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
+ return "Length requested is not a multple of dimension";
+ case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
+ return "GPU does not have double precision required by MRG32k3a";
+ case CURAND_STATUS_LAUNCH_FAILURE:
+ return "Kernel launch failure";
+ case CURAND_STATUS_PREEXISTING_FAILURE:
+ return "Preexisting failure on library entry";
+ case CURAND_STATUS_INITIALIZATION_FAILED:
+ return "Initialization of CUDA failed";
+ case CURAND_STATUS_ARCH_MISMATCH:
+ return "Architecture mismatch, GPU does not support requested feature";
+ case CURAND_STATUS_INTERNAL_ERROR:
+ return "Internal library error";
+ }
+ return "<unknown>";
+}
#define PROFILE_START \
do { \
- cudaEventRecord(profile_start, 0);
+ cudaEventRecord(context->profile_start, 0);
#define PROFILE_STOP \
- cudaEventRecord(profile_stop, 0); \
- cudaEventSynchronize(profile_stop); \
+ cudaEventRecord(context->profile_stop, 0); \
+ cudaEventSynchronize(context->profile_stop); \
float milliseconds = 0; \
- cudaEventElapsedTime(&milliseconds, profile_start, profile_stop); \
- accu_profile(__func__, milliseconds / 1000); \
+ cudaEventElapsedTime(&milliseconds, context->profile_start, \
+ context->profile_stop); \
+ nerv_cuda_context_accu_profile(context, __func__, milliseconds / 1000); \
} while (0);
#define PROFILE_END