diff options
-rw-r--r-- | nerv/Makefile | 2 | ||||
-rw-r--r-- | nerv/io/sgd_buffer.lua | 4 | ||||
-rw-r--r-- | nerv/lib/matrix/generic/cumatrix.c | 2 |
3 files changed, 6 insertions, 2 deletions
diff --git a/nerv/Makefile b/nerv/Makefile index 728d010..0b433d5 100644 --- a/nerv/Makefile +++ b/nerv/Makefile @@ -36,7 +36,7 @@ LUA_LIBS := matrix/init.lua io/init.lua init.lua \ io/sgd_buffer.lua INCLUDE := -I $(LUA_INCDIR) -DLUA_USE_APICHECK -CUDA_BASE := /usr/local/cuda-6.5 +CUDA_BASE := /usr/local/cuda-7.0 #CUDA_BASE := /usr/local/cuda-5.0 CUDA_INCLUDE := -I $(CUDA_BASE)/include/ INCLUDE += $(CUDA_INCLUDE) diff --git a/nerv/io/sgd_buffer.lua b/nerv/io/sgd_buffer.lua index f4f7dfe..604fa07 100644 --- a/nerv/io/sgd_buffer.lua +++ b/nerv/io/sgd_buffer.lua @@ -41,7 +41,7 @@ function SGDBuffer:saturate() buff.data:copy_from(buff.leftover, 0, lrow) buff.leftover = nil end - nerv.printf("leftover: %d\n", lrow) + nerv.printf("buffer leftover: %d\n", lrow) reader.tail = lrow reader.has_leftover = false end @@ -87,9 +87,11 @@ end function SGDBuffer:get_data() local batch_size = self.gconf.batch_size if self.head >= self.tail then -- buffer is empty + local t = os.clock() if not self:saturate() then return nil -- the remaining data cannot build a batch end + nerv.info("%.3fs to fill the buffer\n", os.clock() - t) end if self.head + batch_size > self.tail then return nil -- the remaining data cannot build a batch diff --git a/nerv/lib/matrix/generic/cumatrix.c b/nerv/lib/matrix/generic/cumatrix.c index 40a0030..2cb3563 100644 --- a/nerv/lib/matrix/generic/cumatrix.c +++ b/nerv/lib/matrix/generic/cumatrix.c @@ -321,6 +321,7 @@ void nerv_matrix_(copy_rows_fromh_by_idx)(Matrix *a, const Matrix *b, NERV_EXIT_STATUS(status, MAT_IDX_VECTOR_EXP, 0); if (a->ncol != b->ncol) NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0); + PROFILE_START cudaStream_t *streams = (cudaStream_t*)malloc(sizeof(cudaStream_t) * nrow); for (i = 0; i < nrow; i++) { @@ -339,6 +340,7 @@ void nerv_matrix_(copy_rows_fromh_by_idx)(Matrix *a, const Matrix *b, CUDA_SAFE_CALL(cudaStreamDestroy(streams[i]), status); } free(streams); + PROFILE_STOP NERV_SET_STATUS(status, NERV_NORMAL, 0); } |