aboutsummaryrefslogtreecommitdiff
path: root/nerv/lib/matrix/cuda_helper.h
blob: 5c75e38f6a590b40c01c318a75f457f50ac1b831 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
#ifndef NERV_CUDA_HELPER_H
#define NERV_CUDA_HELPER_H
#include "cuda.h"
#include "cuda_runtime.h"
#include "driver_types.h"
#include "cublas_v2.h"
#include "curand.h"

#define CUBLAS_SAFE_SYNC_CALL_RET(call, status) \
    do { \
        cublasStatus_t  err = (call); \
        if (err != CUBLAS_STATUS_SUCCESS) \
        { \
            NERV_SET_STATUS(status, MAT_CUBLAS_ERR, cublasGetErrorString(err)); \
            return 0; \
        } \
        cudaDeviceSynchronize(); \
    } while (0)

#define CUBLAS_SAFE_SYNC_CALL(call, status) \
    do { \
        cublasStatus_t  err = (call); \
        if (err != CUBLAS_STATUS_SUCCESS) \
            NERV_EXIT_STATUS(status, MAT_CUBLAS_ERR, cublasGetErrorString(err)); \
        cudaDeviceSynchronize(); \
    } while (0)

#define CUDA_SAFE_CALL_RET(call, status) \
    do { \
        cudaError_t err = (call); \
        if (err != cudaSuccess) \
        { \
            NERV_SET_STATUS(status, MAT_CUDA_ERR, cudaGetErrorString(err)); \
            return 0; \
        } \
    } while (0)

#define CUDA_SAFE_CALL(call, status) \
    do { \
        cudaError_t err = (call); \
        if (err != cudaSuccess) \
            NERV_EXIT_STATUS(status, MAT_CUDA_ERR, cudaGetErrorString(err)); \
    } while (0)

#define CUDA_SAFE_SYNC_CALL(call, status) \
    do { \
        CUDA_SAFE_CALL(call, status); \
        cudaDeviceSynchronize(); \
    } while (0)

#define CUDA_SAFE_SYNC_CALL_RET(call, status) \
    do { \
        CUDA_SAFE_CALL_RET(call, status); \
        cudaDeviceSynchronize(); \
    } while (0)

#define CURAND_SAFE_SYNC_CALL(call, status) \
    do { \
        curandStatus_t  err = (call); \
        if (err != CURAND_STATUS_SUCCESS) \
        { \
            NERV_SET_STATUS(status, MAT_CUBLAS_ERR, curandGetErrorString(err)); \
            return; \
        } \
        cudaDeviceSynchronize(); \
    } while (0)

#define CURAND_SAFE_SYNC_CALL_RET(call, status) \
    do { \
        curandStatus_t  err = (call); \
        if (err != CURAND_STATUS_SUCCESS) \
        { \
            NERV_SET_STATUS(status, MAT_CUBLAS_ERR, curandGetErrorString(err)); \
            return 0; \
        } \
        cudaDeviceSynchronize(); \
    } while (0)

#define CHECK_SAME_DIMENSION(a, b, status) \
    do { \
        if (!(a->nrow == b->nrow && a->ncol == b->ncol)) \
            NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0); \
    } while (0)

#define CHECK_SAME_DIMENSION_RET(a, b, status) \
    do { \
        if (!(a->nrow == b->nrow && a->ncol == b->ncol)) \
        { \
            NERV_SET_STATUS(status, MAT_MISMATCH_DIM, 0); \
            return 0; \
        } \
    } while (0)

static const char *cublasGetErrorString(cublasStatus_t err) {
    switch (err)
    {
        case CUBLAS_STATUS_SUCCESS:
            return "CUBLAS_STATUS_SUCCESS";
        case CUBLAS_STATUS_NOT_INITIALIZED:
            return "CUBLAS_STATUS_NOT_INITIALIZED";
        case CUBLAS_STATUS_ALLOC_FAILED:
            return "CUBLAS_STATUS_ALLOC_FAILED";
        case CUBLAS_STATUS_INVALID_VALUE:
            return "CUBLAS_STATUS_INVALID_VALUE";
        case CUBLAS_STATUS_ARCH_MISMATCH:
            return "CUBLAS_STATUS_ARCH_MISMATCH";
        case CUBLAS_STATUS_MAPPING_ERROR:
            return "CUBLAS_STATUS_MAPPING_ERROR";
        case CUBLAS_STATUS_EXECUTION_FAILED:
            return "CUBLAS_STATUS_EXECUTION_FAILED";
        case CUBLAS_STATUS_INTERNAL_ERROR:
            return "CUBLAS_STATUS_INTERNAL_ERROR";
/*        case CUBLAS_STATUS_NOT_SUPPORTED:
            return "CUBLAS_STATUS_NOT_SUPPORTED";
        case CUBLAS_STATUS_LICENSE_ERROR:
            return "CUBLAS_STATUS_LICENSE_ERROR"; */
    }
    return "<unknown>";
}

static const char *curandGetErrorString(curandStatus_t err) {
    switch (err)
    {
        case CURAND_STATUS_VERSION_MISMATCH:
            return "Header file and linked library version do not match";
        case CURAND_STATUS_NOT_INITIALIZED:
            return "Generator not initialized";
        case CURAND_STATUS_ALLOCATION_FAILED:
            return "Memory allocation failed";
        case CURAND_STATUS_TYPE_ERROR:
            return "Generator is wrong type";
        case CURAND_STATUS_OUT_OF_RANGE:
            return "Argument out of range";
        case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
            return "Length requested is not a multple of dimension";
        case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
            return "GPU does not have double precision required by MRG32k3a";
        case CURAND_STATUS_LAUNCH_FAILURE:
            return "Kernel launch failure";
        case CURAND_STATUS_PREEXISTING_FAILURE:
            return "Preexisting failure on library entry";
        case CURAND_STATUS_INITIALIZATION_FAILED:
            return "Initialization of CUDA failed";
        case CURAND_STATUS_ARCH_MISMATCH:
            return "Architecture mismatch, GPU does not support requested feature";
        case CURAND_STATUS_INTERNAL_ERROR:
            return "Internal library error";
    }
    return "<unknown>";
}
#define PROFILE_START \
    do { \
        cudaEventRecord(context->profile_start, 0);
#define PROFILE_STOP \
        cudaEventRecord(context->profile_stop, 0); \
        cudaEventSynchronize(context->profile_stop); \
        float milliseconds = 0; \
        cudaEventElapsedTime(&milliseconds, context->profile_start, \
                                            context->profile_stop); \
        nerv_cuda_context_accu_profile(context, __func__, milliseconds / 1000); \
    } while (0);

#define PROFILE_END
#endif