aboutsummaryrefslogtreecommitdiff
path: root/nerv/lib/matrix/cumatrix.c
diff options
context:
space:
mode:
authorDeterminant <ted.sybil@gmail.com>2016-03-02 16:49:33 +0800
committerDeterminant <ted.sybil@gmail.com>2016-03-02 16:49:33 +0800
commitd3abc6459a776ff7fa3777f4f561bc4f5d5e2075 (patch)
treea0fffbdb4960ba463f720436ca5f050f0d747504 /nerv/lib/matrix/cumatrix.c
parent75108e2e000f4382129d453c0fa9073b14d32f97 (diff)
add GPU auto selection
Diffstat (limited to 'nerv/lib/matrix/cumatrix.c')
-rw-r--r--nerv/lib/matrix/cumatrix.c52
1 files changed, 46 insertions, 6 deletions
diff --git a/nerv/lib/matrix/cumatrix.c b/nerv/lib/matrix/cumatrix.c
index 537fabb..aec4d60 100644
--- a/nerv/lib/matrix/cumatrix.c
+++ b/nerv/lib/matrix/cumatrix.c
@@ -37,7 +37,9 @@ void nerv_cuda_context_accu_profile(CuContext *context,
*val += delta;
}
-static void new_cuda_handles(CuContext *context, Status *status) {
+static void new_cuda_handles(CuContext *context, int dev, Status *status) {
+ if (context->has_handle) return;
+ CUDA_SAFE_SYNC_CALL(cudaSetDevice(dev), status);
CUBLAS_SAFE_SYNC_CALL(cublasCreate(&(context->cublas_handle)), status);
CURAND_SAFE_SYNC_CALL(curandCreateGenerator(&(context->curand_gen),
CURAND_RNG_PSEUDO_DEFAULT), status);
@@ -47,9 +49,12 @@ static void new_cuda_handles(CuContext *context, Status *status) {
CUDA_SAFE_SYNC_CALL(cudaEventCreate(&(context->profile_start)), status);
CUDA_SAFE_SYNC_CALL(cudaEventCreate(&(context->profile_stop)), status);
NERV_SET_STATUS(status, NERV_NORMAL, 0);
+ context->has_handle = 1;
}
static void free_cuda_handles(CuContext *context, Status *status) {
+ if (!context->has_handle) return;
+ context->has_handle = 0;
CUBLAS_SAFE_SYNC_CALL(cublasDestroy(context->cublas_handle), status);
CURAND_SAFE_SYNC_CALL(curandDestroyGenerator(context->curand_gen), status);
CUDA_SAFE_SYNC_CALL(cudaEventDestroy(context->profile_start), status);
@@ -57,9 +62,41 @@ static void free_cuda_handles(CuContext *context, Status *status) {
NERV_SET_STATUS(status, NERV_NORMAL, 0);
}
-CuContext *nerv_cuda_context_create(Status *status) {
+static int choose_best_gpu(Status *status) {
+ int i, n, dev = 0;
+ float best_ratio = 0;
+ fprintf(stderr, "*** select a GPU based on available space\n");
+ CUDA_SAFE_CALL_RET(cudaGetDeviceCount(&n), status);
+ for (i = 0; i < n; i++)
+ {
+ size_t avail, total;
+ float ratio;
+ CUDA_SAFE_SYNC_CALL_RET(cudaSetDevice(i), status);
+ CUDA_SAFE_SYNC_CALL_RET(cuMemGetInfo(&avail, &total), status);
+ ratio = (float)avail/total * 100;
+ fprintf(stderr, "* card %d: %.2f%%\n", i, ratio);
+ if (ratio > best_ratio)
+ {
+ best_ratio = ratio;
+ dev = i;
+ }
+ CUDA_SAFE_SYNC_CALL_RET(cudaDeviceReset(), status);
+ }
+ fprintf(stderr, "*** final decision: GPU %d\n", dev);
+ NERV_SET_STATUS(status, NERV_NORMAL, 0);
+ return dev;
+}
+
+CuContext *nerv_cuda_context_create(int dev, Status *status) {
CuContext *context = (CuContext *)malloc(sizeof(CuContext));
- new_cuda_handles(context, status);
+ context->has_handle = 0; /* this line must come first */
+ if (dev == -1)
+ {
+ dev = choose_best_gpu(status);
+ if (status->err_code != NERV_NORMAL)
+ return NULL;
+ }
+ new_cuda_handles(context, dev, status);
if (status->err_code != NERV_NORMAL)
return NULL;
context->profile = nerv_hashmap_create(PROFILE_HASHMAP_SIZE, bkdr_hash, strcmp);
@@ -78,11 +115,14 @@ void nerv_cuda_context_destroy(CuContext *context, Status *status) {
void nerv_cuda_context_select_gpu(CuContext *context,
int dev, Status *status) {
- CUDA_SAFE_SYNC_CALL(cudaSetDevice(dev), status);
- free_cuda_handles(context, status);
+ /* free_cuda_handles(context, status);
if (status->err_code != NERV_NORMAL)
return;
- new_cuda_handles(context, status);
+ */
+ /* because of cudaDeviceReset */
+ context->has_handle = 0;
+ CUDA_SAFE_SYNC_CALL(cudaDeviceReset(), status);
+ new_cuda_handles(context, dev, status);
if (status->err_code != NERV_NORMAL)
return;
NERV_SET_STATUS(status, NERV_NORMAL, 0);