1 files changed, 58 insertions, 5 deletions
diff --git a/nerv/lib/matrix/cuda_helper.h b/nerv/lib/matrix/cuda_helper.h
index 13d5728..5c75e38 100644
--- a/nerv/lib/matrix/cuda_helper.h
+++ b/nerv/lib/matrix/cuda_helper.h
@@ -54,6 +54,28 @@
         cudaDeviceSynchronize(); \
     } while (0)
 
+#define CURAND_SAFE_SYNC_CALL(call, status) \
+    do { \
+        curandStatus_t  err = (call); \
+        if (err != CURAND_STATUS_SUCCESS) \
+        { \
+            NERV_SET_STATUS(status, MAT_CUBLAS_ERR, curandGetErrorString(err)); \
+            return; \
+        } \
+        cudaDeviceSynchronize(); \
+    } while (0)
+
+#define CURAND_SAFE_SYNC_CALL_RET(call, status) \
+    do { \
+        curandStatus_t  err = (call); \
+        if (err != CURAND_STATUS_SUCCESS) \
+        { \
+            NERV_SET_STATUS(status, MAT_CUBLAS_ERR, curandGetErrorString(err)); \
+            return 0; \
+        } \
+        cudaDeviceSynchronize(); \
+    } while (0)
+
 #define CHECK_SAME_DIMENSION(a, b, status) \
     do { \
         if (!(a->nrow == b->nrow && a->ncol == b->ncol)) \
@@ -96,15 +118,46 @@ static const char *cublasGetErrorString(cublasStatus_t err) {
     return "<unknown>";
 }
 
+static const char *curandGetErrorString(curandStatus_t err) {
+    switch (err)
+    {
+        case CURAND_STATUS_VERSION_MISMATCH:
+            return "Header file and linked library version do not match";
+        case CURAND_STATUS_NOT_INITIALIZED:
+            return "Generator not initialized";
+        case CURAND_STATUS_ALLOCATION_FAILED:
+            return "Memory allocation failed";
+        case CURAND_STATUS_TYPE_ERROR:
+            return "Generator is wrong type";
+        case CURAND_STATUS_OUT_OF_RANGE:
+            return "Argument out of range";
+        case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
+            return "Length requested is not a multple of dimension";
+        case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
+            return "GPU does not have double precision required by MRG32k3a";
+        case CURAND_STATUS_LAUNCH_FAILURE:
+            return "Kernel launch failure";
+        case CURAND_STATUS_PREEXISTING_FAILURE:
+            return "Preexisting failure on library entry";
+        case CURAND_STATUS_INITIALIZATION_FAILED:
+            return "Initialization of CUDA failed";
+        case CURAND_STATUS_ARCH_MISMATCH:
+            return "Architecture mismatch, GPU does not support requested feature";
+        case CURAND_STATUS_INTERNAL_ERROR:
+            return "Internal library error";
+    }
+    return "<unknown>";
+}
 #define PROFILE_START \
     do { \
-        cudaEventRecord(profile_start, 0);
+        cudaEventRecord(context->profile_start, 0);
 #define PROFILE_STOP \
-        cudaEventRecord(profile_stop, 0); \
-        cudaEventSynchronize(profile_stop); \
+        cudaEventRecord(context->profile_stop, 0); \
+        cudaEventSynchronize(context->profile_stop); \
         float milliseconds = 0; \
-        cudaEventElapsedTime(&milliseconds, profile_start, profile_stop); \
-        accu_profile(__func__, milliseconds / 1000); \
+        cudaEventElapsedTime(&milliseconds, context->profile_start, \
+                                            context->profile_stop); \
+        nerv_cuda_context_accu_profile(context, __func__, milliseconds / 1000); \
     } while (0);
 
 #define PROFILE_END