summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDeterminant <[email protected]>2015-08-14 15:02:34 +0800
committerDeterminant <[email protected]>2015-08-14 15:02:34 +0800
commit4b3e8591816e553a4409f5fa95f5983e59ff711f (patch)
tree8b757fb63767bc1463b8df23dd1cf24949608cf8
parent7082ba094be6ccbf97cfaf893ea437f31ced197b (diff)
add profiling for copy_rows_fromh_by_idx
-rw-r--r--nerv/Makefile2
-rw-r--r--nerv/io/sgd_buffer.lua4
-rw-r--r--nerv/lib/matrix/generic/cumatrix.c2
3 files changed, 6 insertions, 2 deletions
diff --git a/nerv/Makefile b/nerv/Makefile
index 728d010..0b433d5 100644
--- a/nerv/Makefile
+++ b/nerv/Makefile
@@ -36,7 +36,7 @@ LUA_LIBS := matrix/init.lua io/init.lua init.lua \
io/sgd_buffer.lua
INCLUDE := -I $(LUA_INCDIR) -DLUA_USE_APICHECK
-CUDA_BASE := /usr/local/cuda-6.5
+CUDA_BASE := /usr/local/cuda-7.0
#CUDA_BASE := /usr/local/cuda-5.0
CUDA_INCLUDE := -I $(CUDA_BASE)/include/
INCLUDE += $(CUDA_INCLUDE)
diff --git a/nerv/io/sgd_buffer.lua b/nerv/io/sgd_buffer.lua
index f4f7dfe..604fa07 100644
--- a/nerv/io/sgd_buffer.lua
+++ b/nerv/io/sgd_buffer.lua
@@ -41,7 +41,7 @@ function SGDBuffer:saturate()
buff.data:copy_from(buff.leftover, 0, lrow)
buff.leftover = nil
end
- nerv.printf("leftover: %d\n", lrow)
+ nerv.printf("buffer leftover: %d\n", lrow)
reader.tail = lrow
reader.has_leftover = false
end
@@ -87,9 +87,11 @@ end
function SGDBuffer:get_data()
local batch_size = self.gconf.batch_size
if self.head >= self.tail then -- buffer is empty
+ local t = os.clock()
if not self:saturate() then
return nil -- the remaining data cannot build a batch
end
+ nerv.info("%.3fs to fill the buffer\n", os.clock() - t)
end
if self.head + batch_size > self.tail then
return nil -- the remaining data cannot build a batch
diff --git a/nerv/lib/matrix/generic/cumatrix.c b/nerv/lib/matrix/generic/cumatrix.c
index 40a0030..2cb3563 100644
--- a/nerv/lib/matrix/generic/cumatrix.c
+++ b/nerv/lib/matrix/generic/cumatrix.c
@@ -321,6 +321,7 @@ void nerv_matrix_(copy_rows_fromh_by_idx)(Matrix *a, const Matrix *b,
NERV_EXIT_STATUS(status, MAT_IDX_VECTOR_EXP, 0);
if (a->ncol != b->ncol)
NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0);
+ PROFILE_START
cudaStream_t *streams = (cudaStream_t*)malloc(sizeof(cudaStream_t) * nrow);
for (i = 0; i < nrow; i++)
{
@@ -339,6 +340,7 @@ void nerv_matrix_(copy_rows_fromh_by_idx)(Matrix *a, const Matrix *b,
CUDA_SAFE_CALL(cudaStreamDestroy(streams[i]), status);
}
free(streams);
+ PROFILE_STOP
NERV_SET_STATUS(status, NERV_NORMAL, 0);
}