summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--nerv/Makefile2
-rw-r--r--nerv/lib/matrix/cumatrix.c52
-rw-r--r--nerv/lib/matrix/cumatrix.h3
-rw-r--r--nerv/matrix/cumatrix.c3
-rw-r--r--nerv/nerv9
5 files changed, 56 insertions, 13 deletions
diff --git a/nerv/Makefile b/nerv/Makefile
index 9ef3212..46e79a0 100644
--- a/nerv/Makefile
+++ b/nerv/Makefile
@@ -42,7 +42,7 @@ INCLUDE := -I $(LUA_INCDIR) -DLUA_USE_APICHECK
CUDA_INCLUDE := -I $(CUDA_BASE)/include/
INCLUDE += $(CUDA_INCLUDE)
-CUDA_LDFLAGS := -L$(CUDA_BASE)/lib64/ -Wl,-rpath=$(CUDA_BASE)/lib64/ -lcudart -lcublas -lcurand
+CUDA_LDFLAGS := -L$(CUDA_BASE)/lib64/ -Wl,-rpath=$(CUDA_BASE)/lib64/ -lcudart -lcuda -lcublas -lcurand
NVCC := $(CUDA_BASE)/bin/nvcc
EMPTY :=
diff --git a/nerv/lib/matrix/cumatrix.c b/nerv/lib/matrix/cumatrix.c
index 537fabb..aec4d60 100644
--- a/nerv/lib/matrix/cumatrix.c
+++ b/nerv/lib/matrix/cumatrix.c
@@ -37,7 +37,9 @@ void nerv_cuda_context_accu_profile(CuContext *context,
*val += delta;
}
-static void new_cuda_handles(CuContext *context, Status *status) {
+static void new_cuda_handles(CuContext *context, int dev, Status *status) {
+ if (context->has_handle) return;
+ CUDA_SAFE_SYNC_CALL(cudaSetDevice(dev), status);
CUBLAS_SAFE_SYNC_CALL(cublasCreate(&(context->cublas_handle)), status);
CURAND_SAFE_SYNC_CALL(curandCreateGenerator(&(context->curand_gen),
CURAND_RNG_PSEUDO_DEFAULT), status);
@@ -47,9 +49,12 @@ static void new_cuda_handles(CuContext *context, Status *status) {
CUDA_SAFE_SYNC_CALL(cudaEventCreate(&(context->profile_start)), status);
CUDA_SAFE_SYNC_CALL(cudaEventCreate(&(context->profile_stop)), status);
NERV_SET_STATUS(status, NERV_NORMAL, 0);
+ context->has_handle = 1;
}
static void free_cuda_handles(CuContext *context, Status *status) {
+ if (!context->has_handle) return;
+ context->has_handle = 0;
CUBLAS_SAFE_SYNC_CALL(cublasDestroy(context->cublas_handle), status);
CURAND_SAFE_SYNC_CALL(curandDestroyGenerator(context->curand_gen), status);
CUDA_SAFE_SYNC_CALL(cudaEventDestroy(context->profile_start), status);
@@ -57,9 +62,41 @@ static void free_cuda_handles(CuContext *context, Status *status) {
NERV_SET_STATUS(status, NERV_NORMAL, 0);
}
-CuContext *nerv_cuda_context_create(Status *status) {
+static int choose_best_gpu(Status *status) {
+ int i, n, dev = 0;
+ float best_ratio = 0;
+ fprintf(stderr, "*** select a GPU based on available space\n");
+ CUDA_SAFE_CALL_RET(cudaGetDeviceCount(&n), status);
+ for (i = 0; i < n; i++)
+ {
+ size_t avail, total;
+ float ratio;
+ CUDA_SAFE_SYNC_CALL_RET(cudaSetDevice(i), status);
+ CUDA_SAFE_SYNC_CALL_RET(cuMemGetInfo(&avail, &total), status);
+ ratio = (float)avail/total * 100;
+ fprintf(stderr, "* card %d: %.2f%%\n", i, ratio);
+ if (ratio > best_ratio)
+ {
+ best_ratio = ratio;
+ dev = i;
+ }
+ CUDA_SAFE_SYNC_CALL_RET(cudaDeviceReset(), status);
+ }
+ fprintf(stderr, "*** final decision: GPU %d\n", dev);
+ NERV_SET_STATUS(status, NERV_NORMAL, 0);
+ return dev;
+}
+
+CuContext *nerv_cuda_context_create(int dev, Status *status) {
CuContext *context = (CuContext *)malloc(sizeof(CuContext));
- new_cuda_handles(context, status);
+ context->has_handle = 0; /* this line must come first */
+ if (dev == -1)
+ {
+ dev = choose_best_gpu(status);
+ if (status->err_code != NERV_NORMAL)
+ return NULL;
+ }
+ new_cuda_handles(context, dev, status);
if (status->err_code != NERV_NORMAL)
return NULL;
context->profile = nerv_hashmap_create(PROFILE_HASHMAP_SIZE, bkdr_hash, strcmp);
@@ -78,11 +115,14 @@ void nerv_cuda_context_destroy(CuContext *context, Status *status) {
void nerv_cuda_context_select_gpu(CuContext *context,
int dev, Status *status) {
- CUDA_SAFE_SYNC_CALL(cudaSetDevice(dev), status);
- free_cuda_handles(context, status);
+ /* free_cuda_handles(context, status);
if (status->err_code != NERV_NORMAL)
return;
- new_cuda_handles(context, status);
+ */
+ /* because of cudaDeviceReset */
+ context->has_handle = 0;
+ CUDA_SAFE_SYNC_CALL(cudaDeviceReset(), status);
+ new_cuda_handles(context, dev, status);
if (status->err_code != NERV_NORMAL)
return;
NERV_SET_STATUS(status, NERV_NORMAL, 0);
diff --git a/nerv/lib/matrix/cumatrix.h b/nerv/lib/matrix/cumatrix.h
index 280035b..fd2a5ce 100644
--- a/nerv/lib/matrix/cumatrix.h
+++ b/nerv/lib/matrix/cumatrix.h
@@ -5,6 +5,7 @@
#include "cuda_helper.h"
typedef struct CuContext {
+ int has_handle;
cublasHandle_t cublas_handle;
cudaEvent_t profile_start, profile_stop;
curandGenerator_t curand_gen;
@@ -15,6 +16,6 @@ void nerv_cuda_context_print_profile(CuContext *context);
void nerv_cuda_context_clear_profile(CuContext *context);
void nerv_cuda_context_accu_profile(CuContext *context, const char *name, float delta);
void nerv_cuda_context_select_gpu(CuContext *context, int dev, Status *status);
-CuContext *nerv_cuda_context_create(Status *status);
+CuContext *nerv_cuda_context_create(int dev, Status *status);
void nerv_cuda_context_destroy(CuContext *contex, Status *status);
#endif
diff --git a/nerv/matrix/cumatrix.c b/nerv/matrix/cumatrix.c
index 7d10895..b8eef9c 100644
--- a/nerv/matrix/cumatrix.c
+++ b/nerv/matrix/cumatrix.c
@@ -26,7 +26,8 @@ int nerv_cuda_context_lua_clear_profile(lua_State *L) {
int nerv_cuda_context_lua_new(lua_State *L) {
Status status;
- CuContext *self = nerv_cuda_context_create(&status);
+ int dev = lua_gettop(L) > 0 ? luaL_checkinteger(L, 1) : -1;
+ CuContext *self = nerv_cuda_context_create(dev, &status);
NERV_LUA_CHECK_STATUS(L, status);
luaT_pushudata(L, self, nerv_cuda_context_tname);
return 1;
diff --git a/nerv/nerv b/nerv/nerv
index 9295290..0b75a9b 100644
--- a/nerv/nerv
+++ b/nerv/nerv
@@ -24,15 +24,16 @@ local function _add_profile_method(cls)
end
if not opts["use-cpu"].val then
+ local dev = -1
+ if opts["select-gpu"].val then
+ dev = opts["select-gpu"].val
+ end
nerv.info("automatically initialize a default CuContext...")
- nerv.CuMatrix._default_context = nerv.CuContext()
+ nerv.CuMatrix._default_context = nerv.CuContext(dev)
nerv.info("the default CuContext is ok")
_add_profile_method(nerv.CuMatrix)
nerv.CuMatrix.select_gpu =
function (dev) nerv.CuMatrix._default_context:select_gpu(dev) end
- if opts["select-gpu"].val then
- nerv.CuMatrix.select_gpu(opts["select-gpu"].val)
- end
end
nerv.info("automatically initialize a default MContext...")