diff options
Diffstat (limited to 'nerv/lib/matrix/cuda_helper.h')
-rw-r--r-- | nerv/lib/matrix/cuda_helper.h | 110 |
1 files changed, 110 insertions, 0 deletions
diff --git a/nerv/lib/matrix/cuda_helper.h b/nerv/lib/matrix/cuda_helper.h new file mode 100644 index 0000000..8041efb --- /dev/null +++ b/nerv/lib/matrix/cuda_helper.h @@ -0,0 +1,110 @@ +#ifndef NERV_CUDA_HELPER_H +#define NERV_CUDA_HELPER_H +#include "cuda.h" +#include "cuda_runtime.h" +#include "driver_types.h" +#include "cublas_v2.h" + +#define CUBLAS_SAFE_SYNC_CALL_RET(call, status) \ + do { \ + cublasStatus_t err = (call); \ + if (err != CUBLAS_STATUS_SUCCESS) \ + { \ + NERV_SET_STATUS(status, MAT_CUBLAS_ERR, cublasGetErrorString(err)); \ + return 0; \ + } \ + cudaDeviceSynchronize(); \ + } while (0) + +#define CUBLAS_SAFE_SYNC_CALL(call, status) \ + do { \ + cublasStatus_t err = (call); \ + if (err != CUBLAS_STATUS_SUCCESS) \ + NERV_EXIT_STATUS(status, MAT_CUBLAS_ERR, cublasGetErrorString(err)); \ + cudaDeviceSynchronize(); \ + } while (0) + +#define CUDA_SAFE_CALL_RET(call, status) \ + do { \ + cudaError_t err = (call); \ + if (err != cudaSuccess) \ + { \ + NERV_SET_STATUS(status, MAT_CUDA_ERR, cudaGetErrorString(err)); \ + return 0; \ + } \ + } while (0) + +#define CUDA_SAFE_CALL(call, status) \ + do { \ + cudaError_t err = (call); \ + if (err != cudaSuccess) \ + NERV_EXIT_STATUS(status, MAT_CUDA_ERR, cudaGetErrorString(err)); \ + } while (0) + +#define CUDA_SAFE_SYNC_CALL(call, status) \ + do { \ + CUDA_SAFE_CALL(call, status); \ + cudaDeviceSynchronize(); \ + } while (0) + +#define CUDA_SAFE_SYNC_CALL_RET(call, status) \ + do { \ + CUDA_SAFE_CALL_RET(call, status); \ + cudaDeviceSynchronize(); \ + } while (0) + +#define CHECK_SAME_DIMENSION(a, b, status) \ + do { \ + if (!(a->nrow == b->nrow && a->ncol == b->ncol)) \ + NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0); \ + } while (0) + +#define CHECK_SAME_DIMENSION_RET(a, b, status) \ + do { \ + if (!(a->nrow == b->nrow && a->ncol == b->ncol)) \ + { \ + NERV_SET_STATUS(status, MAT_MISMATCH_DIM, 0); \ + return 0; \ + } \ + } while (0) + +static const char *cublasGetErrorString(cublasStatus_t err) { + switch (err) + { + case CUBLAS_STATUS_SUCCESS: + return "CUBLAS_STATUS_SUCCESS"; + case CUBLAS_STATUS_NOT_INITIALIZED: + return "CUBLAS_STATUS_NOT_INITIALIZED"; + case CUBLAS_STATUS_ALLOC_FAILED: + return "CUBLAS_STATUS_ALLOC_FAILED"; + case CUBLAS_STATUS_INVALID_VALUE: + return "CUBLAS_STATUS_INVALID_VALUE"; + case CUBLAS_STATUS_ARCH_MISMATCH: + return "CUBLAS_STATUS_ARCH_MISMATCH"; + case CUBLAS_STATUS_MAPPING_ERROR: + return "CUBLAS_STATUS_MAPPING_ERROR"; + case CUBLAS_STATUS_EXECUTION_FAILED: + return "CUBLAS_STATUS_EXECUTION_FAILED"; + case CUBLAS_STATUS_INTERNAL_ERROR: + return "CUBLAS_STATUS_INTERNAL_ERROR"; +/* case CUBLAS_STATUS_NOT_SUPPORTED: + return "CUBLAS_STATUS_NOT_SUPPORTED"; + case CUBLAS_STATUS_LICENSE_ERROR: + return "CUBLAS_STATUS_LICENSE_ERROR"; */ + } + return "<unknown>"; +} + +#define PROFILE_START \ + do { \ + cudaEventRecord(profile_start, 0); +#define PROFILE_STOP \ + cudaEventRecord(profile_stop, 0); \ + cudaEventSynchronize(profile_stop); \ + float milliseconds = 0; \ + cudaEventElapsedTime(&milliseconds, profile_start, profile_stop); \ + accu_profile(__func__, milliseconds / 1000); \ + } while (0); + +#define PROFILE_END +#endif |