1 files changed, 49 insertions, 11 deletions
diff --git a/nerv/lib/matrix/cuda_helper.h b/nerv/lib/matrix/cuda_helper.h
index 13d5728..2d18486 100644
--- a/nerv/lib/matrix/cuda_helper.h
+++ b/nerv/lib/matrix/cuda_helper.h
@@ -54,19 +54,26 @@
 cudaDeviceSynchronize(); \
 } while (0)
 
-#define CHECK_SAME_DIMENSION(a, b, status) \
+#define CURAND_SAFE_SYNC_CALL(call, status) \
 do { \
- if (!(a->nrow == b->nrow && a->ncol == b->ncol)) \
- NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0); \
+ curandStatus_t err = (call); \
+ if (err != CURAND_STATUS_SUCCESS) \
+ { \
+ NERV_SET_STATUS(status, MAT_CUBLAS_ERR, curandGetErrorString(err)); \
+ return; \
+ } \
+ cudaDeviceSynchronize(); \
 } while (0)
 
-#define CHECK_SAME_DIMENSION_RET(a, b, status) \
+#define CURAND_SAFE_SYNC_CALL_RET(call, status) \
 do { \
- if (!(a->nrow == b->nrow && a->ncol == b->ncol)) \
+ curandStatus_t err = (call); \
+ if (err != CURAND_STATUS_SUCCESS) \
 { \
- NERV_SET_STATUS(status, MAT_MISMATCH_DIM, 0); \
+ NERV_SET_STATUS(status, MAT_CUBLAS_ERR, curandGetErrorString(err)); \
 return 0; \
 } \
+ cudaDeviceSynchronize(); \
 } while (0)
 
 static const char *cublasGetErrorString(cublasStatus_t err) {
@@ -96,15 +103,46 @@ static const char *cublasGetErrorString(cublasStatus_t err) {
 return "<unknown>";
 }
 
+static const char *curandGetErrorString(curandStatus_t err) {
+ switch (err)
+ {
+ case CURAND_STATUS_VERSION_MISMATCH:
+ return "Header file and linked library version do not match";
+ case CURAND_STATUS_NOT_INITIALIZED:
+ return "Generator not initialized";
+ case CURAND_STATUS_ALLOCATION_FAILED:
+ return "Memory allocation failed";
+ case CURAND_STATUS_TYPE_ERROR:
+ return "Generator is wrong type";
+ case CURAND_STATUS_OUT_OF_RANGE:
+ return "Argument out of range";
+ case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
+ return "Length requested is not a multple of dimension";
+ case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
+ return "GPU does not have double precision required by MRG32k3a";
+ case CURAND_STATUS_LAUNCH_FAILURE:
+ return "Kernel launch failure";
+ case CURAND_STATUS_PREEXISTING_FAILURE:
+ return "Preexisting failure on library entry";
+ case CURAND_STATUS_INITIALIZATION_FAILED:
+ return "Initialization of CUDA failed";
+ case CURAND_STATUS_ARCH_MISMATCH:
+ return "Architecture mismatch, GPU does not support requested feature";
+ case CURAND_STATUS_INTERNAL_ERROR:
+ return "Internal library error";
+ }
+ return "<unknown>";
+}
 #define PROFILE_START \
 do { \
- cudaEventRecord(profile_start, 0);
+ cudaEventRecord(context->profile_start, 0);
 #define PROFILE_STOP \
- cudaEventRecord(profile_stop, 0); \
- cudaEventSynchronize(profile_stop); \
+ cudaEventRecord(context->profile_stop, 0); \
+ cudaEventSynchronize(context->profile_stop); \
 float milliseconds = 0; \
- cudaEventElapsedTime(&milliseconds, profile_start, profile_stop); \
- accu_profile(__func__, milliseconds / 1000); \
+ cudaEventElapsedTime(&milliseconds, context->profile_start, \
+ context->profile_stop); \
+ nerv_cuda_context_accu_profile(context, __func__, milliseconds / 1000); \
 } while (0);
 
 #define PROFILE_END