From 4b3e8591816e553a4409f5fa95f5983e59ff711f Mon Sep 17 00:00:00 2001 From: Determinant Date: Fri, 14 Aug 2015 15:02:34 +0800 Subject: add profiling for copy_rows_fromh_by_idx --- nerv/Makefile | 2 +- nerv/io/sgd_buffer.lua | 4 +++- nerv/lib/matrix/generic/cumatrix.c | 2 ++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/nerv/Makefile b/nerv/Makefile index 728d010..0b433d5 100644 --- a/nerv/Makefile +++ b/nerv/Makefile @@ -36,7 +36,7 @@ LUA_LIBS := matrix/init.lua io/init.lua init.lua \ io/sgd_buffer.lua INCLUDE := -I $(LUA_INCDIR) -DLUA_USE_APICHECK -CUDA_BASE := /usr/local/cuda-6.5 +CUDA_BASE := /usr/local/cuda-7.0 #CUDA_BASE := /usr/local/cuda-5.0 CUDA_INCLUDE := -I $(CUDA_BASE)/include/ INCLUDE += $(CUDA_INCLUDE) diff --git a/nerv/io/sgd_buffer.lua b/nerv/io/sgd_buffer.lua index f4f7dfe..604fa07 100644 --- a/nerv/io/sgd_buffer.lua +++ b/nerv/io/sgd_buffer.lua @@ -41,7 +41,7 @@ function SGDBuffer:saturate() buff.data:copy_from(buff.leftover, 0, lrow) buff.leftover = nil end - nerv.printf("leftover: %d\n", lrow) + nerv.printf("buffer leftover: %d\n", lrow) reader.tail = lrow reader.has_leftover = false end @@ -87,9 +87,11 @@ end function SGDBuffer:get_data() local batch_size = self.gconf.batch_size if self.head >= self.tail then -- buffer is empty + local t = os.clock() if not self:saturate() then return nil -- the remaining data cannot build a batch end + nerv.info("%.3fs to fill the buffer\n", os.clock() - t) end if self.head + batch_size > self.tail then return nil -- the remaining data cannot build a batch diff --git a/nerv/lib/matrix/generic/cumatrix.c b/nerv/lib/matrix/generic/cumatrix.c index 40a0030..2cb3563 100644 --- a/nerv/lib/matrix/generic/cumatrix.c +++ b/nerv/lib/matrix/generic/cumatrix.c @@ -321,6 +321,7 @@ void nerv_matrix_(copy_rows_fromh_by_idx)(Matrix *a, const Matrix *b, NERV_EXIT_STATUS(status, MAT_IDX_VECTOR_EXP, 0); if (a->ncol != b->ncol) NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0); + PROFILE_START cudaStream_t *streams = (cudaStream_t*)malloc(sizeof(cudaStream_t) * nrow); for (i = 0; i < nrow; i++) { @@ -339,6 +340,7 @@ void nerv_matrix_(copy_rows_fromh_by_idx)(Matrix *a, const Matrix *b, CUDA_SAFE_CALL(cudaStreamDestroy(streams[i]), status); } free(streams); + PROFILE_STOP NERV_SET_STATUS(status, NERV_NORMAL, 0); } -- cgit v1.2.3 From 6cad1b1947fb2ba237b0e843cb7900cdc1653294 Mon Sep 17 00:00:00 2001 From: Determinant Date: Fri, 14 Aug 2015 15:22:07 +0800 Subject: use default cuda library path --- nerv/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nerv/Makefile b/nerv/Makefile index 0b433d5..b5d26bd 100644 --- a/nerv/Makefile +++ b/nerv/Makefile @@ -36,8 +36,8 @@ LUA_LIBS := matrix/init.lua io/init.lua init.lua \ io/sgd_buffer.lua INCLUDE := -I $(LUA_INCDIR) -DLUA_USE_APICHECK -CUDA_BASE := /usr/local/cuda-7.0 -#CUDA_BASE := /usr/local/cuda-5.0 +#CUDA_BASE := /usr/local/cuda-7.0 +CUDA_BASE := /usr/local/cuda CUDA_INCLUDE := -I $(CUDA_BASE)/include/ INCLUDE += $(CUDA_INCLUDE) -- cgit v1.2.3 From 47dba09eeba2463a804e89c9d0aed7b30cc92b4e Mon Sep 17 00:00:00 2001 From: Determinant Date: Tue, 25 Aug 2015 11:38:57 +0800 Subject: use more general implementation for mat:create --- nerv/matrix/generic/cumatrix.c | 10 ---------- nerv/matrix/init.lua | 4 ++++ 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/nerv/matrix/generic/cumatrix.c b/nerv/matrix/generic/cumatrix.c index 4bdf5f0..ab7f7c4 100644 --- a/nerv/matrix/generic/cumatrix.c +++ b/nerv/matrix/generic/cumatrix.c @@ -43,15 +43,6 @@ static int nerv_matrix_(lua_mul)(lua_State *L) { return 0; } -static int nerv_matrix_(lua_create)(lua_State *L) { - Status status; - Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); - Matrix *b = nerv_matrix_(create)(a->nrow, a->ncol, &status); - NERV_LUA_CHECK_STATUS(L, status); - luaT_pushudata(L, b, nerv_matrix_(tname)); - return 1; -} - static int nerv_matrix_(lua_sigmoid)(lua_State *L) { Status status; Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); @@ -289,7 +280,6 @@ static int nerv_matrix_(lua_scale_rows_by_row)(lua_State *L) { } static const luaL_Reg nerv_matrix_(extra_methods)[] = { - {"create", nerv_matrix_(lua_create)}, {"colsum", nerv_matrix_(lua_colsum)}, {"colsame", nerv_matrix_(lua_colsame)}, {"rowsum", nerv_matrix_(lua_rowsum)}, diff --git a/nerv/matrix/init.lua b/nerv/matrix/init.lua index f230e9f..ae9b884 100644 --- a/nerv/matrix/init.lua +++ b/nerv/matrix/init.lua @@ -45,6 +45,10 @@ function nerv.Matrix:generate(gen) end end +function nerv.Matrix:create() + return self.__constructor(self:nrow(), self:ncol()) +end + nerv.MMatrixInt.fmt = "%d " function nerv.CuMatrix:__add__(b) -- cgit v1.2.3 From ed2a4148dbb9c18f428571b3e2970d7b2adfb058 Mon Sep 17 00:00:00 2001 From: Determinant Date: Tue, 25 Aug 2015 11:47:47 +0800 Subject: add optional parameters to mat:create --- nerv/matrix/init.lua | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nerv/matrix/init.lua b/nerv/matrix/init.lua index ae9b884..1091d7e 100644 --- a/nerv/matrix/init.lua +++ b/nerv/matrix/init.lua @@ -45,8 +45,8 @@ function nerv.Matrix:generate(gen) end end -function nerv.Matrix:create() - return self.__constructor(self:nrow(), self:ncol()) +function nerv.Matrix:create(nrow, ncol) + return self.__constructor(nrow or self:nrow(), ncol or self:ncol()) end nerv.MMatrixInt.fmt = "%d " -- cgit v1.2.3 From e81e9832ec4f2ad031fd42b5018cea134e8cda7e Mon Sep 17 00:00:00 2001 From: Determinant Date: Wed, 26 Aug 2015 14:26:54 +0800 Subject: move global_transf to asr_trainer.lua --- nerv/examples/asr_trainer.lua | 23 +++++++++++++++++++---- nerv/examples/swb_baseline.lua | 7 ++++--- nerv/examples/swb_baseline_basic.lua | 7 ++++--- nerv/io/sgd_buffer.lua | 2 +- nerv/layer/mse.lua | 2 +- nerv/nn/layer_dag.lua | 27 +++++++++++++++++++++++++++ 6 files changed, 56 insertions(+), 12 deletions(-) diff --git a/nerv/examples/asr_trainer.lua b/nerv/examples/asr_trainer.lua index dcadfa3..5a50542 100644 --- a/nerv/examples/asr_trainer.lua +++ b/nerv/examples/asr_trainer.lua @@ -3,6 +3,7 @@ function build_trainer(ifname) param_repo:import(ifname, nil, gconf) local layer_repo = make_layer_repo(param_repo) local network = get_network(layer_repo) + local global_transf = get_global_transf(layer_repo) local input_order = get_input_order() local iterative_trainer = function (prefix, scp_file, bp) gconf.randomize = bp @@ -24,15 +25,29 @@ function build_trainer(ifname) -- break end local input = {} --- if gconf.cnt == 100 then break end - for i, id in ipairs(input_order) do +-- if gconf.cnt == 1000 then break end + for i, e in ipairs(input_order) do + local id = e.id if data[id] == nil then nerv.error("input data %s not found", id) end - table.insert(input, data[id]) + local transformed + if e.global_transf then + transformed = nerv.speech_utils.global_transf(data[id], + global_transf, + gconf.frm_ext or 0, + gconf.frm_trim or 0, + gconf) + else + transformed = data[id] + end + table.insert(input, transformed) end local output = {nerv.CuMatrixFloat(gconf.batch_size, 1)} - err_output = {input[1]:create()} + err_output = {} + for i = 1, #input do + table.insert(err_output, input[i]:create()) + end network:propagate(input, output) if bp then network:back_propagate(err_input, err_output, input, output) diff --git a/nerv/examples/swb_baseline.lua b/nerv/examples/swb_baseline.lua index 0e9f897..bbc6467 100644 --- a/nerv/examples/swb_baseline.lua +++ b/nerv/examples/swb_baseline.lua @@ -3,6 +3,7 @@ gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9, cumat_type = nerv.CuMatrixFloat, mmat_type = nerv.MMatrixFloat, frm_ext = 5, + frm_trim = 5, tr_scp = "/slfs1/users/mfy43/swb_ivec/train_bp.scp", cv_scp = "/slfs1/users/mfy43/swb_ivec/train_cv.scp", htk_conf = "/slfs1/users/mfy43/swb_ivec/plp_0_d_a.conf", @@ -161,8 +162,7 @@ function make_readers(scp_file, layer_repo) dir = "*/", ext = "lab" } - }, - global_transf = layer_repo:get_layer("global_transf") + } }), data = {main_scp = 429, phone_state = 1}} } @@ -178,7 +178,8 @@ function make_buffer(readers) end function get_input_order() - return {"main_scp", "phone_state"} + return {{id = "main_scp", global_transf = true}, + {id = "phone_state"}} end function get_accuracy(layer_repo) diff --git a/nerv/examples/swb_baseline_basic.lua b/nerv/examples/swb_baseline_basic.lua index c47ec3e..71f04a3 100644 --- a/nerv/examples/swb_baseline_basic.lua +++ b/nerv/examples/swb_baseline_basic.lua @@ -3,6 +3,7 @@ gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9, cumat_type = nerv.CuMatrixFloat, mmat_type = nerv.MMatrixFloat, frm_ext = 5, + frm_trim = 5, tr_scp = "/slfs1/users/mfy43/swb_ivec/train_bp.scp", cv_scp = "/slfs1/users/mfy43/swb_ivec/train_cv.scp", htk_conf = "/slfs1/users/mfy43/swb_ivec/plp_0_d_a.conf", @@ -124,8 +125,7 @@ function make_readers(scp_file, layer_repo) dir = "*/", ext = "lab" } - }, - global_transf = layer_repo:get_layer("global_transf") + } }), data = {main_scp = 429, phone_state = 1}} } @@ -141,7 +141,8 @@ function make_buffer(readers) end function get_input_order() - return {"main_scp", "phone_state"} + return {{id = "main_scp", global_transf = true}, + {id = "phone_state"}} end function get_accuracy(layer_repo) diff --git a/nerv/io/sgd_buffer.lua b/nerv/io/sgd_buffer.lua index 604fa07..f9d281c 100644 --- a/nerv/io/sgd_buffer.lua +++ b/nerv/io/sgd_buffer.lua @@ -91,7 +91,7 @@ function SGDBuffer:get_data() if not self:saturate() then return nil -- the remaining data cannot build a batch end - nerv.info("%.3fs to fill the buffer\n", os.clock() - t) + nerv.info("%.3fs to fill the buffer", os.clock() - t) end if self.head + batch_size > self.tail then return nil -- the remaining data cannot build a batch diff --git a/nerv/layer/mse.lua b/nerv/layer/mse.lua index 9a97add..2516998 100644 --- a/nerv/layer/mse.lua +++ b/nerv/layer/mse.lua @@ -34,7 +34,7 @@ function MSELayer:propagate(input, output) if output[1] ~= nil then output[1]:copy_fromd(mse_sum) end - self.total_mse = self.total_mse + mse_sum:colsum()[0] + self.total_mse = self.total_mse + mse_sum:colsum()[0][0] self.total_frames = self.total_frames + mse_sum:nrow() end diff --git a/nerv/nn/layer_dag.lua b/nerv/nn/layer_dag.lua index e9d4d86..25297c2 100644 --- a/nerv/nn/layer_dag.lua +++ b/nerv/nn/layer_dag.lua @@ -254,3 +254,30 @@ function DAGLayer:get_params() end return nerv.ParamRepo.merge(param_repos) end + +DAGLayer.PORT_TYPES = { + INPUT = {}, + OUTPUT = {}, + ERR_INPUT = {}, + ERR_OUTPUT = {} +} + +function DAGLayer:get_intermediate(id, port_type) + if id == "" or id == "" then + nerv.error("an actual real layer id is expected") + end + local layer = layers[id] + if layer == nil then + nerv.error("layer id %s not found", id) + end + if port_type == DAGLayer.PORT_TYPES.INPUT then + return layer.inputs + elseif port_type == DAGLayer.PORT_TYPES.OUTPUT then + return layer.outputs + elseif port_type == DAGLayer.PORT_TYPES.ERR_INPUT then + return layer.err_inputs + elseif port_type == DAGLayer.PORT_TYPES.ERR_OUTPUT then + return layer.err_outputs + end + nerv.error("unrecognized port type") +end -- cgit v1.2.3 From e97b97e4c684e7f26064bcc0a6440ac5d6cddc47 Mon Sep 17 00:00:00 2001 From: Determinant Date: Wed, 26 Aug 2015 15:43:00 +0800 Subject: ... --- nerv/examples/asr_trainer.lua | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nerv/examples/asr_trainer.lua b/nerv/examples/asr_trainer.lua index 5a50542..69cfeed 100644 --- a/nerv/examples/asr_trainer.lua +++ b/nerv/examples/asr_trainer.lua @@ -35,8 +35,7 @@ function build_trainer(ifname) if e.global_transf then transformed = nerv.speech_utils.global_transf(data[id], global_transf, - gconf.frm_ext or 0, - gconf.frm_trim or 0, + gconf.frm_ext or 0, 0, gconf) else transformed = data[id] -- cgit v1.2.3