diff options
-rw-r--r-- | nerv/Makefile | 2 | ||||
-rw-r--r-- | nerv/lib/matrix/cumatrix.c | 52 | ||||
-rw-r--r-- | nerv/lib/matrix/cumatrix.h | 3 | ||||
-rw-r--r-- | nerv/matrix/cumatrix.c | 3 | ||||
-rw-r--r-- | nerv/nerv | 9 |
5 files changed, 56 insertions, 13 deletions
diff --git a/nerv/Makefile b/nerv/Makefile index 9ef3212..46e79a0 100644 --- a/nerv/Makefile +++ b/nerv/Makefile @@ -42,7 +42,7 @@ INCLUDE := -I $(LUA_INCDIR) -DLUA_USE_APICHECK CUDA_INCLUDE := -I $(CUDA_BASE)/include/ INCLUDE += $(CUDA_INCLUDE) -CUDA_LDFLAGS := -L$(CUDA_BASE)/lib64/ -Wl,-rpath=$(CUDA_BASE)/lib64/ -lcudart -lcublas -lcurand +CUDA_LDFLAGS := -L$(CUDA_BASE)/lib64/ -Wl,-rpath=$(CUDA_BASE)/lib64/ -lcudart -lcuda -lcublas -lcurand NVCC := $(CUDA_BASE)/bin/nvcc EMPTY := diff --git a/nerv/lib/matrix/cumatrix.c b/nerv/lib/matrix/cumatrix.c index 537fabb..aec4d60 100644 --- a/nerv/lib/matrix/cumatrix.c +++ b/nerv/lib/matrix/cumatrix.c @@ -37,7 +37,9 @@ void nerv_cuda_context_accu_profile(CuContext *context, *val += delta; } -static void new_cuda_handles(CuContext *context, Status *status) { +static void new_cuda_handles(CuContext *context, int dev, Status *status) { + if (context->has_handle) return; + CUDA_SAFE_SYNC_CALL(cudaSetDevice(dev), status); CUBLAS_SAFE_SYNC_CALL(cublasCreate(&(context->cublas_handle)), status); CURAND_SAFE_SYNC_CALL(curandCreateGenerator(&(context->curand_gen), CURAND_RNG_PSEUDO_DEFAULT), status); @@ -47,9 +49,12 @@ static void new_cuda_handles(CuContext *context, Status *status) { CUDA_SAFE_SYNC_CALL(cudaEventCreate(&(context->profile_start)), status); CUDA_SAFE_SYNC_CALL(cudaEventCreate(&(context->profile_stop)), status); NERV_SET_STATUS(status, NERV_NORMAL, 0); + context->has_handle = 1; } static void free_cuda_handles(CuContext *context, Status *status) { + if (!context->has_handle) return; + context->has_handle = 0; CUBLAS_SAFE_SYNC_CALL(cublasDestroy(context->cublas_handle), status); CURAND_SAFE_SYNC_CALL(curandDestroyGenerator(context->curand_gen), status); CUDA_SAFE_SYNC_CALL(cudaEventDestroy(context->profile_start), status); @@ -57,9 +62,41 @@ static void free_cuda_handles(CuContext *context, Status *status) { NERV_SET_STATUS(status, NERV_NORMAL, 0); } -CuContext *nerv_cuda_context_create(Status *status) { +static int choose_best_gpu(Status *status) { + int i, n, dev = 0; + float best_ratio = 0; + fprintf(stderr, "*** select a GPU based on available space\n"); + CUDA_SAFE_CALL_RET(cudaGetDeviceCount(&n), status); + for (i = 0; i < n; i++) + { + size_t avail, total; + float ratio; + CUDA_SAFE_SYNC_CALL_RET(cudaSetDevice(i), status); + CUDA_SAFE_SYNC_CALL_RET(cuMemGetInfo(&avail, &total), status); + ratio = (float)avail/total * 100; + fprintf(stderr, "* card %d: %.2f%%\n", i, ratio); + if (ratio > best_ratio) + { + best_ratio = ratio; + dev = i; + } + CUDA_SAFE_SYNC_CALL_RET(cudaDeviceReset(), status); + } + fprintf(stderr, "*** final decision: GPU %d\n", dev); + NERV_SET_STATUS(status, NERV_NORMAL, 0); + return dev; +} + +CuContext *nerv_cuda_context_create(int dev, Status *status) { CuContext *context = (CuContext *)malloc(sizeof(CuContext)); - new_cuda_handles(context, status); + context->has_handle = 0; /* this line must come first */ + if (dev == -1) + { + dev = choose_best_gpu(status); + if (status->err_code != NERV_NORMAL) + return NULL; + } + new_cuda_handles(context, dev, status); if (status->err_code != NERV_NORMAL) return NULL; context->profile = nerv_hashmap_create(PROFILE_HASHMAP_SIZE, bkdr_hash, strcmp); @@ -78,11 +115,14 @@ void nerv_cuda_context_destroy(CuContext *context, Status *status) { void nerv_cuda_context_select_gpu(CuContext *context, int dev, Status *status) { - CUDA_SAFE_SYNC_CALL(cudaSetDevice(dev), status); - free_cuda_handles(context, status); + /* free_cuda_handles(context, status); if (status->err_code != NERV_NORMAL) return; - new_cuda_handles(context, status); + */ + /* because of cudaDeviceReset */ + context->has_handle = 0; + CUDA_SAFE_SYNC_CALL(cudaDeviceReset(), status); + new_cuda_handles(context, dev, status); if (status->err_code != NERV_NORMAL) return; NERV_SET_STATUS(status, NERV_NORMAL, 0); diff --git a/nerv/lib/matrix/cumatrix.h b/nerv/lib/matrix/cumatrix.h index 280035b..fd2a5ce 100644 --- a/nerv/lib/matrix/cumatrix.h +++ b/nerv/lib/matrix/cumatrix.h @@ -5,6 +5,7 @@ #include "cuda_helper.h" typedef struct CuContext { + int has_handle; cublasHandle_t cublas_handle; cudaEvent_t profile_start, profile_stop; curandGenerator_t curand_gen; @@ -15,6 +16,6 @@ void nerv_cuda_context_print_profile(CuContext *context); void nerv_cuda_context_clear_profile(CuContext *context); void nerv_cuda_context_accu_profile(CuContext *context, const char *name, float delta); void nerv_cuda_context_select_gpu(CuContext *context, int dev, Status *status); -CuContext *nerv_cuda_context_create(Status *status); +CuContext *nerv_cuda_context_create(int dev, Status *status); void nerv_cuda_context_destroy(CuContext *contex, Status *status); #endif diff --git a/nerv/matrix/cumatrix.c b/nerv/matrix/cumatrix.c index 7d10895..b8eef9c 100644 --- a/nerv/matrix/cumatrix.c +++ b/nerv/matrix/cumatrix.c @@ -26,7 +26,8 @@ int nerv_cuda_context_lua_clear_profile(lua_State *L) { int nerv_cuda_context_lua_new(lua_State *L) { Status status; - CuContext *self = nerv_cuda_context_create(&status); + int dev = lua_gettop(L) > 0 ? luaL_checkinteger(L, 1) : -1; + CuContext *self = nerv_cuda_context_create(dev, &status); NERV_LUA_CHECK_STATUS(L, status); luaT_pushudata(L, self, nerv_cuda_context_tname); return 1; @@ -24,15 +24,16 @@ local function _add_profile_method(cls) end if not opts["use-cpu"].val then + local dev = -1 + if opts["select-gpu"].val then + dev = opts["select-gpu"].val + end nerv.info("automatically initialize a default CuContext...") - nerv.CuMatrix._default_context = nerv.CuContext() + nerv.CuMatrix._default_context = nerv.CuContext(dev) nerv.info("the default CuContext is ok") _add_profile_method(nerv.CuMatrix) nerv.CuMatrix.select_gpu = function (dev) nerv.CuMatrix._default_context:select_gpu(dev) end - if opts["select-gpu"].val then - nerv.CuMatrix.select_gpu(opts["select-gpu"].val) - end end nerv.info("automatically initialize a default MContext...") |