From 5b16335a903551ffef4fafa88d67146b9131a74e Mon Sep 17 00:00:00 2001 From: Determinant Date: Tue, 4 Aug 2015 11:11:50 +0800 Subject: ... --- nerv/examples/asr_trainer.lua | 4 ++-- nerv/init.lua | 2 +- nerv/nerv | 10 +++++----- nerv/nn/layer_dag.lua | 6 ++++++ 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/nerv/examples/asr_trainer.lua b/nerv/examples/asr_trainer.lua index 4fa4096..8dfb2ac 100644 --- a/nerv/examples/asr_trainer.lua +++ b/nerv/examples/asr_trainer.lua @@ -12,7 +12,7 @@ function build_trainer(ifname) -- initialize the network network:init(gconf.batch_size) gconf.cnt = 0 - err_input = {nerv.CuMatrixFloat(256, 1)} + err_input = {nerv.CuMatrixFloat(gconf.batch_size, 1)} err_input[1]:fill(1) for data in buffer.get_data, buffer do -- prine stat periodically @@ -32,7 +32,7 @@ function build_trainer(ifname) end table.insert(input, data[id]) end - local output = {nerv.CuMatrixFloat(256, 1)} + local output = {nerv.CuMatrixFloat(gconf.batch_size, 1)} err_output = {input[1]:create()} network:propagate(input, output) if bp then diff --git a/nerv/init.lua b/nerv/init.lua index 89010a7..183ae6d 100644 --- a/nerv/init.lua +++ b/nerv/init.lua @@ -1,7 +1,7 @@ require 'libnerv' function nerv.error(fmt, ...) - error(nerv.printf("[nerv] internal error: " .. fmt .. "\n", ...)) + error("[nerv] internal error: " .. fmt .. "\n", ...) end function nerv.error_method_not_implemented() diff --git a/nerv/nerv b/nerv/nerv index 7571659..e5943aa 100644 --- a/nerv/nerv +++ b/nerv/nerv @@ -1,13 +1,13 @@ #! /usr/bin/env luajit require 'nerv' print("Greetings") -if #arg < 1 then +if #args < 1 then return end -local script = arg[1] +local script = args[1] local script_arg = {} -for i = 2, #arg do - table.insert(script_arg, arg[i]) +for i = 2, #args do + table.insert(script_arg, args[i]) end -arg = script_arg +args = script_arg dofile(script) diff --git a/nerv/nn/layer_dag.lua b/nerv/nn/layer_dag.lua index 8e30216..e5c1ac7 100644 --- a/nerv/nn/layer_dag.lua +++ b/nerv/nn/layer_dag.lua @@ -177,6 +177,9 @@ end function DAGLayer:set_inputs(input) for i = 1, #self.dim_in do + if input[i] == nil then + nerv.error("some input is not provided"); + end local layer = self.inputs[i][1] local port = self.inputs[i][2] layer.inputs[port] = input[i] @@ -185,6 +188,9 @@ end function DAGLayer:set_outputs(output) for i = 1, #self.dim_out do + if output[i] == nil then + nerv.error("some output is not provided"); + end local layer = self.outputs[i][1] local port = self.outputs[i][2] layer.outputs[port] = output[i] -- cgit v1.2.3-70-g09d2 From 462d1982f299c8be4ae9a746e6ae7d04c04faa33 Mon Sep 17 00:00:00 2001 From: Determinant Date: Tue, 4 Aug 2015 14:41:22 +0800 Subject: add embedding_example --- embedding_example/.gitignore | 2 + embedding_example/Makefile | 9 +++ embedding_example/main.c | 95 ++++++++++++++++++++++++++ embedding_example/setup_nerv.lua | 26 +++++++ embedding_example/swb_baseline_decode.lua | 109 ++++++++++++++++++++++++++++++ nerv/Makefile | 2 +- nerv/layer/init.lua | 1 + nerv/lib/matrix/generic/matrix.c | 4 +- nerv/lib/matrix/generic/matrix.h | 2 + 9 files changed, 247 insertions(+), 3 deletions(-) create mode 100644 embedding_example/.gitignore create mode 100644 embedding_example/Makefile create mode 100644 embedding_example/main.c create mode 100644 embedding_example/setup_nerv.lua create mode 100644 embedding_example/swb_baseline_decode.lua diff --git a/embedding_example/.gitignore b/embedding_example/.gitignore new file mode 100644 index 0000000..8e68213 --- /dev/null +++ b/embedding_example/.gitignore @@ -0,0 +1,2 @@ +main +main.o diff --git a/embedding_example/Makefile b/embedding_example/Makefile new file mode 100644 index 0000000..e4ee314 --- /dev/null +++ b/embedding_example/Makefile @@ -0,0 +1,9 @@ +CFLAG += -I ../install/include/luajit-2.0/ -I ../install/include/nerv/ +LDFLAG += -L../install/lib/ -lluajit-5.1 -Wl,-rpath=../install/lib/ -lluaT -lnervcore +GCC := gcc + +main: main.o + $(GCC) -o $@ $< $(LDFLAG) + +main.o: main.c + $(GCC) $(CFLAG) -o $@ $< -c diff --git a/embedding_example/main.c b/embedding_example/main.c new file mode 100644 index 0000000..4e70892 --- /dev/null +++ b/embedding_example/main.c @@ -0,0 +1,95 @@ +#include "lua.h" +#include "lauxlib.h" +#include "lualib.h" +#include "matrix/matrix.h" +#include "common.h" +#include "luaT/luaT.h" +#include + +const char *nerv_matrix_host_float_tname = "nerv.MMatrixFloat"; +extern Matrix *nerv_matrix_host_float_create(long nrow, long ncol, Status *status); +extern void nerv_matrix_host_float_data_retain(Matrix *self); +extern void nerv_matrix_host_float_data_free(Matrix *self, Status *status); + +lua_State *L; +Matrix *input, *output; +Status status; + +void setup_nerv() { + L = lua_open(); + luaL_openlibs(L); + luaL_loadfile(L, "setup_nerv.lua"); + /* network configuration */ + lua_pushstring(L, "swb_baseline_decode.lua"); + if (lua_pcall(L, 1, LUA_MULTRET, 0)) + { + printf("%s\n", luaL_checkstring(L, 1)); + exit(1); + } + /* lua stack now: input width, output width, propagator */ + input = nerv_matrix_host_float_create(1, luaL_checkinteger(L, 1), &status); + NERV_LUA_CHECK_STATUS(L, status); + output = nerv_matrix_host_float_create(1, luaL_checkinteger(L, 2), &status); + NERV_LUA_CHECK_STATUS(L, status); +} + + +void propagate(float for_fun) { + int i, j; + printf("ok: %d\n", lua_gettop(L)); + lua_pushvalue(L, 3); + /* lua stack now: input width, output width, propagator, propagator */ + for (i = 0; i < input->nrow; i++) /* nrow is actually 1 */ + { + float *nerv_row = (float *)((char *)input->data.f + i * input->stride); + for (j = 0; j < input->ncol; j++) + { + nerv_row[j] = j * for_fun; + } + } + /* avoid gc */ + nerv_matrix_host_float_data_retain(input); + nerv_matrix_host_float_data_retain(input); + nerv_matrix_host_float_data_retain(input); + nerv_matrix_host_float_data_retain(input); + nerv_matrix_host_float_data_retain(output); + nerv_matrix_host_float_data_retain(output); + nerv_matrix_host_float_data_retain(output); + nerv_matrix_host_float_data_retain(output); + + luaT_pushudata(L, input, nerv_matrix_host_float_tname); + luaT_pushudata(L, output, nerv_matrix_host_float_tname); + /* lua stack now: input width, output width, propagator, propagator, input, output */ + if (lua_pcall(L, 2, 0, 0)) /* call propagator with two parameters */ + { + printf("%s\n", luaL_checkstring(L, -1)); + exit(-1); + } + /* lua stack now: input width, output width, propagator */ + printf("## caller ##\n"); + for (i = 0; i < output->nrow; i++) /* nrow is actually 1 */ + { + float *nerv_row = (float *)((char *)output->data.f + i * output->stride); + for (j = 0; j < output->ncol; j++) + { + printf("%.8f ", nerv_row[j]); + } + printf("\n"); + } +} + +void teardown_nerv() { + nerv_matrix_host_float_data_free(input, &status); + NERV_LUA_CHECK_STATUS(L, status); + nerv_matrix_host_float_data_free(output, &status); + NERV_LUA_CHECK_STATUS(L, status); +} + +int main() { + setup_nerv(); + propagate(1.0); + propagate(2.0); + propagate(3.0); + teardown_nerv(); + return 0; +} diff --git a/embedding_example/setup_nerv.lua b/embedding_example/setup_nerv.lua new file mode 100644 index 0000000..e33a1e7 --- /dev/null +++ b/embedding_example/setup_nerv.lua @@ -0,0 +1,26 @@ +package.path="/home/slhome/mfy43/.luarocks/share/lua/5.1/?.lua;/home/slhome/mfy43/.luarocks/share/lua/5.1/?/init.lua;/home/slhome/mfy43/nerv/install/share/lua/5.1/?.lua;/home/slhome/mfy43/nerv/install/share/lua/5.1/?/init.lua;"..package.path +package.cpath="/home/slhome/mfy43/.luarocks/lib/lua/5.1/?.so;/home/slhome/mfy43/nerv/install/lib/lua/5.1/?.so;"..package.cpath +local k,l,_=pcall(require,"luarocks.loader") _=k and l.add_context("nerv","scm-1") + +local args = {...} +require 'nerv' +dofile(args[1]) +local param_repo = nerv.ParamRepo() +param_repo:import(gconf.initialized_param, nil, gconf) +local sublayer_repo = make_sublayer_repo(param_repo) +local layer_repo = make_layer_repo(sublayer_repo, param_repo) +local network = get_network(layer_repo) +local batch_size = 1 +network:init(batch_size) +function propagator(input, output) + local gpu_input = nerv.CuMatrixFloat(input:nrow(), input:ncol()) + local gpu_output = nerv.CuMatrixFloat(output:nrow(), output:ncol()) + gpu_input:copy_fromh(input) + print(gpu_input) + network:propagate({gpu_input}, {gpu_output}) + gpu_output:copy_toh(output) + print(output) + -- collect garbage in-time to save GPU memory + collectgarbage("collect") +end +return network.dim_in[1], network.dim_out[1], propagator diff --git a/embedding_example/swb_baseline_decode.lua b/embedding_example/swb_baseline_decode.lua new file mode 100644 index 0000000..14a463b --- /dev/null +++ b/embedding_example/swb_baseline_decode.lua @@ -0,0 +1,109 @@ +require 'htk_io' +gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9, + cumat_type = nerv.CuMatrixFloat, + mmat_type = nerv.MMatrixFloat, + frm_ext = 5, + tr_scp = "/slfs1/users/mfy43/swb_ivec/train_bp.scp", + cv_scp = "/slfs1/users/mfy43/swb_ivec/train_cv.scp", + htk_conf = "/slfs1/users/mfy43/swb_ivec/plp_0_d_a.conf", + initialized_param = {"/slfs1/users/mfy43/swb_init.nerv", + "/slfs1/users/mfy43/swb_global_transf.nerv"}, + debug = false} + +function make_sublayer_repo(param_repo) + return nerv.LayerRepo( + { + -- global transf + ["nerv.BiasLayer"] = + { + blayer1 = {{bias = "bias1"}, {dim_in = {429}, dim_out = {429}}}, + blayer2 = {{bias = "bias2"}, {dim_in = {429}, dim_out = {429}}} + }, + ["nerv.WindowLayer"] = + { + wlayer1 = {{window = "window1"}, {dim_in = {429}, dim_out = {429}}}, + wlayer2 = {{window = "window2"}, {dim_in = {429}, dim_out = {429}}} + }, + -- biased linearity + ["nerv.AffineLayer"] = + { + affine0 = {{ltp = "affine0_ltp", bp = "affine0_bp"}, + {dim_in = {429}, dim_out = {2048}}}, + affine1 = {{ltp = "affine1_ltp", bp = "affine1_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine2 = {{ltp = "affine2_ltp", bp = "affine2_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine3 = {{ltp = "affine3_ltp", bp = "affine3_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine4 = {{ltp = "affine4_ltp", bp = "affine4_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine5 = {{ltp = "affine5_ltp", bp = "affine5_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine6 = {{ltp = "affine6_ltp", bp = "affine6_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine7 = {{ltp = "affine7_ltp", bp = "affine7_bp"}, + {dim_in = {2048}, dim_out = {3001}}} + }, + ["nerv.SigmoidLayer"] = + { + sigmoid0 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid1 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid2 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid3 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid4 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid5 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid6 = {{}, {dim_in = {2048}, dim_out = {2048}}} + }, + ["nerv.SoftmaxLayer"] = + { + soutput = {{}, {dim_in = {3001}, dim_out = {3001}}} + } + }, param_repo, gconf) +end + +function make_layer_repo(sublayer_repo, param_repo) + return nerv.LayerRepo( + { + ["nerv.DAGLayer"] = + { + global_transf = {{}, { + dim_in = {429}, dim_out = {429}, + sub_layers = sublayer_repo, + connections = { + ["[1]"] = "blayer1[1]", + ["blayer1[1]"] = "wlayer1[1]", + ["wlayer1[1]"] = "blayer2[1]", + ["blayer2[1]"] = "wlayer2[1]", + ["wlayer2[1]"] = "[1]" + } + }}, + main = {{}, { + dim_in = {429}, dim_out = {3001}, + sub_layers = sublayer_repo, + connections = { + ["[1]"] = "affine0[1]", + ["affine0[1]"] = "sigmoid0[1]", + ["sigmoid0[1]"] = "affine1[1]", + ["affine1[1]"] = "sigmoid1[1]", + ["sigmoid1[1]"] = "affine2[1]", + ["affine2[1]"] = "sigmoid2[1]", + ["sigmoid2[1]"] = "affine3[1]", + ["affine3[1]"] = "sigmoid3[1]", + ["sigmoid3[1]"] = "affine4[1]", + ["affine4[1]"] = "sigmoid4[1]", + ["sigmoid4[1]"] = "affine5[1]", + ["affine5[1]"] = "sigmoid5[1]", + ["sigmoid5[1]"] = "affine6[1]", + ["affine6[1]"] = "sigmoid6[1]", + ["sigmoid6[1]"] = "affine7[1]", + ["affine7[1]"] = "soutput[1]", + ["soutput[1]"] = "[1]" + } + }} + } + }, param_repo, gconf) +end + +function get_network(layer_repo) + return layer_repo:get_layer("main") +end diff --git a/nerv/Makefile b/nerv/Makefile index 022e2fb..fdffd12 100644 --- a/nerv/Makefile +++ b/nerv/Makefile @@ -30,7 +30,7 @@ LUAT_OBJS := $(addprefix $(OBJ_DIR)/,$(LUAT_OBJS)) OBJS := $(CORE_OBJS) $(NERV_OBJS) $(LUAT_OBJS) LIBS := $(INST_LIBDIR)/libnerv.so $(LIB_PATH)/libnervcore.so $(LIB_PATH)/libluaT.so LUA_LIBS := matrix/init.lua io/init.lua init.lua \ - layer/init.lua layer/affine.lua layer/sigmoid.lua layer/softmax_ce.lua \ + layer/init.lua layer/affine.lua layer/sigmoid.lua layer/softmax_ce.lua layer/softmax.lua \ layer/window.lua layer/bias.lua layer/combiner.lua layer/mse.lua layer/affine_recurrent.lua\ nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/layer_dag.lua \ io/sgd_buffer.lua diff --git a/nerv/layer/init.lua b/nerv/layer/init.lua index 7172f99..6861b0e 100644 --- a/nerv/layer/init.lua +++ b/nerv/layer/init.lua @@ -78,3 +78,4 @@ nerv.include('window.lua') nerv.include('mse.lua') nerv.include('combiner.lua') nerv.include('affine_recurrent.lua') +nerv.include('softmax.lua') diff --git a/nerv/lib/matrix/generic/matrix.c b/nerv/lib/matrix/generic/matrix.c index e4afa37..6cb3dc0 100644 --- a/nerv/lib/matrix/generic/matrix.c +++ b/nerv/lib/matrix/generic/matrix.c @@ -3,7 +3,7 @@ #include "matrix.h" /* FIXME: malloc failure detection */ -static void nerv_matrix_(data_free)(Matrix *self, Status *status) { +void nerv_matrix_(data_free)(Matrix *self, Status *status) { assert(*self->data_ref > 0); if (--(*self->data_ref) == 0) { @@ -18,7 +18,7 @@ static void nerv_matrix_(data_free)(Matrix *self, Status *status) { } } -static void nerv_matrix_(data_retain)(Matrix *self) { +void nerv_matrix_(data_retain)(Matrix *self) { (*self->data_ref)++; } diff --git a/nerv/lib/matrix/generic/matrix.h b/nerv/lib/matrix/generic/matrix.h index 9d44e6d..69b4e6d 100644 --- a/nerv/lib/matrix/generic/matrix.h +++ b/nerv/lib/matrix/generic/matrix.h @@ -2,3 +2,5 @@ Matrix *nerv_matrix_(create)(long nrow, long ncol, Status *status); void nerv_matrix_(destroy)(Matrix *self, Status *status); Matrix *nerv_matrix_(getrow)(Matrix *self, int row); +void nerv_matrix_(data_free)(Matrix *self, Status *status); +void nerv_matrix_(data_retain)(Matrix *self); -- cgit v1.2.3-70-g09d2 From 0223b6b88620d9235fc47818aaa5c86ae81d38d9 Mon Sep 17 00:00:00 2001 From: Determinant Date: Tue, 4 Aug 2015 14:42:08 +0800 Subject: ... --- embedding_example/main.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/embedding_example/main.c b/embedding_example/main.c index 4e70892..4c6459c 100644 --- a/embedding_example/main.c +++ b/embedding_example/main.c @@ -49,12 +49,6 @@ void propagate(float for_fun) { } /* avoid gc */ nerv_matrix_host_float_data_retain(input); - nerv_matrix_host_float_data_retain(input); - nerv_matrix_host_float_data_retain(input); - nerv_matrix_host_float_data_retain(input); - nerv_matrix_host_float_data_retain(output); - nerv_matrix_host_float_data_retain(output); - nerv_matrix_host_float_data_retain(output); nerv_matrix_host_float_data_retain(output); luaT_pushudata(L, input, nerv_matrix_host_float_tname); -- cgit v1.2.3-70-g09d2 From e20b60f659b08c46b9da0591ee489803f3f3d300 Mon Sep 17 00:00:00 2001 From: Determinant Date: Tue, 4 Aug 2015 15:51:53 +0800 Subject: ... --- nerv/layer/softmax.lua | 31 +++++++++++++++++++++++++++++++ nerv/nerv | 10 +++++----- 2 files changed, 36 insertions(+), 5 deletions(-) create mode 100644 nerv/layer/softmax.lua diff --git a/nerv/layer/softmax.lua b/nerv/layer/softmax.lua new file mode 100644 index 0000000..e979ebf --- /dev/null +++ b/nerv/layer/softmax.lua @@ -0,0 +1,31 @@ +local SoftmaxLayer = nerv.class("nerv.SoftmaxLayer", "nerv.Layer") + +function SoftmaxLayer:__init(id, global_conf, layer_conf) + self.id = id + self.gconf = global_conf + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self:check_dim_len(1, 1) -- two inputs: nn output and label +end + +function SoftmaxLayer:init(batch_size) + if self.dim_in[1] ~= self.dim_out[1] then + nerv.error("mismatching dimensions of input and output") + end +end + +function SoftmaxLayer:update(bp_err, input, output) + -- no params, therefore do nothing +end + +function SoftmaxLayer:propagate(input, output) + output[1]:softmax(input[1]) +end + +function SoftmaxLayer:back_propagate(bp_err, next_bp_err, input, output) + nerv.error_method_not_implemented() +end + +function SoftmaxLayer:get_params() + return nerv.ParamRepo({}) +end diff --git a/nerv/nerv b/nerv/nerv index e5943aa..7571659 100644 --- a/nerv/nerv +++ b/nerv/nerv @@ -1,13 +1,13 @@ #! /usr/bin/env luajit require 'nerv' print("Greetings") -if #args < 1 then +if #arg < 1 then return end -local script = args[1] +local script = arg[1] local script_arg = {} -for i = 2, #args do - table.insert(script_arg, args[i]) +for i = 2, #arg do + table.insert(script_arg, arg[i]) end -args = script_arg +arg = script_arg dofile(script) -- cgit v1.2.3-70-g09d2 From c3effaac9e9965371a73f9c84c2a4e0880f32138 Mon Sep 17 00:00:00 2001 From: Determinant Date: Tue, 4 Aug 2015 17:27:09 +0800 Subject: fix gc issues --- embedding_example/Makefile | 6 ++++++ embedding_example/main.c | 28 ++++++++++++++++------------ embedding_example/run.sh | 4 ++++ embedding_example/setup_nerv.lua | 9 ++++----- 4 files changed, 30 insertions(+), 17 deletions(-) create mode 100755 embedding_example/run.sh diff --git a/embedding_example/Makefile b/embedding_example/Makefile index e4ee314..3420b30 100644 --- a/embedding_example/Makefile +++ b/embedding_example/Makefile @@ -2,6 +2,12 @@ CFLAG += -I ../install/include/luajit-2.0/ -I ../install/include/nerv/ LDFLAG += -L../install/lib/ -lluajit-5.1 -Wl,-rpath=../install/lib/ -lluaT -lnervcore GCC := gcc +.PHONY: FORCE + +FORCE: ../install/bin/luarocks + echo "#!/bin/bash" > run.sh + $< path >> run.sh + echo "./main" >> run.sh main: main.o $(GCC) -o $@ $< $(LDFLAG) diff --git a/embedding_example/main.c b/embedding_example/main.c index 4c6459c..b3c9bf2 100644 --- a/embedding_example/main.c +++ b/embedding_example/main.c @@ -7,6 +7,8 @@ #include const char *nerv_matrix_host_float_tname = "nerv.MMatrixFloat"; +const char *input_name = "_nerv_embed_input"; +const char *output_name = "_nerv_embed_output"; extern Matrix *nerv_matrix_host_float_create(long nrow, long ncol, Status *status); extern void nerv_matrix_host_float_data_retain(Matrix *self); extern void nerv_matrix_host_float_data_free(Matrix *self, Status *status); @@ -31,6 +33,11 @@ void setup_nerv() { NERV_LUA_CHECK_STATUS(L, status); output = nerv_matrix_host_float_create(1, luaL_checkinteger(L, 2), &status); NERV_LUA_CHECK_STATUS(L, status); + /* add reference to avoid gc */ + luaT_pushudata(L, output, nerv_matrix_host_float_tname); + luaT_pushudata(L, input, nerv_matrix_host_float_tname); + lua_setfield(L, LUA_GLOBALSINDEX, input_name); + lua_setfield(L, LUA_GLOBALSINDEX, output_name); } @@ -47,12 +54,8 @@ void propagate(float for_fun) { nerv_row[j] = j * for_fun; } } - /* avoid gc */ - nerv_matrix_host_float_data_retain(input); - nerv_matrix_host_float_data_retain(output); - - luaT_pushudata(L, input, nerv_matrix_host_float_tname); - luaT_pushudata(L, output, nerv_matrix_host_float_tname); + lua_getfield(L, LUA_GLOBALSINDEX, input_name); + lua_getfield(L, LUA_GLOBALSINDEX, output_name); /* lua stack now: input width, output width, propagator, propagator, input, output */ if (lua_pcall(L, 2, 0, 0)) /* call propagator with two parameters */ { @@ -60,7 +63,7 @@ void propagate(float for_fun) { exit(-1); } /* lua stack now: input width, output width, propagator */ - printf("## caller ##\n"); + printf("## output: %ld %ld ##\n", output->nrow, output->ncol); for (i = 0; i < output->nrow; i++) /* nrow is actually 1 */ { float *nerv_row = (float *)((char *)output->data.f + i * output->stride); @@ -68,21 +71,22 @@ void propagate(float for_fun) { { printf("%.8f ", nerv_row[j]); } - printf("\n"); } } void teardown_nerv() { - nerv_matrix_host_float_data_free(input, &status); - NERV_LUA_CHECK_STATUS(L, status); - nerv_matrix_host_float_data_free(output, &status); - NERV_LUA_CHECK_STATUS(L, status); + lua_pushnil(L); + lua_pushnil(L); + lua_setfield(L, LUA_GLOBALSINDEX, input_name); + lua_setfield(L, LUA_GLOBALSINDEX, output_name); + lua_gc(L, LUA_GCCOLLECT, 0); } int main() { setup_nerv(); propagate(1.0); propagate(2.0); + propagate(2.0); propagate(3.0); teardown_nerv(); return 0; diff --git a/embedding_example/run.sh b/embedding_example/run.sh new file mode 100755 index 0000000..e919263 --- /dev/null +++ b/embedding_example/run.sh @@ -0,0 +1,4 @@ +#!/bin/bash +export LUA_PATH='/home/slhome/mfy43/.luarocks/share/lua/5.1/?.lua;/home/slhome/mfy43/.luarocks/share/lua/5.1/?/init.lua;/home/slhome/mfy43/nerv/install/share/lua/5.1/?.lua;/home/slhome/mfy43/nerv/install/share/lua/5.1/?/init.lua;./?.lua;/usr/local/share/luajit-2.0.4/?.lua;/usr/local/share/lua/5.1/?.lua;/usr/local/share/lua/5.1/?/init.lua' +export LUA_CPATH='/home/slhome/mfy43/.luarocks/lib/lua/5.1/?.so;/home/slhome/mfy43/nerv/install/lib/lua/5.1/?.so;./?.so;/usr/local/lib/lua/5.1/?.so;/usr/local/lib/lua/5.1/loadall.so' +./main diff --git a/embedding_example/setup_nerv.lua b/embedding_example/setup_nerv.lua index e33a1e7..3ae878d 100644 --- a/embedding_example/setup_nerv.lua +++ b/embedding_example/setup_nerv.lua @@ -1,10 +1,7 @@ -package.path="/home/slhome/mfy43/.luarocks/share/lua/5.1/?.lua;/home/slhome/mfy43/.luarocks/share/lua/5.1/?/init.lua;/home/slhome/mfy43/nerv/install/share/lua/5.1/?.lua;/home/slhome/mfy43/nerv/install/share/lua/5.1/?/init.lua;"..package.path -package.cpath="/home/slhome/mfy43/.luarocks/lib/lua/5.1/?.so;/home/slhome/mfy43/nerv/install/lib/lua/5.1/?.so;"..package.cpath local k,l,_=pcall(require,"luarocks.loader") _=k and l.add_context("nerv","scm-1") - -local args = {...} require 'nerv' -dofile(args[1]) +local arg = {...} +dofile(arg[1]) local param_repo = nerv.ParamRepo() param_repo:import(gconf.initialized_param, nil, gconf) local sublayer_repo = make_sublayer_repo(param_repo) @@ -12,6 +9,7 @@ local layer_repo = make_layer_repo(sublayer_repo, param_repo) local network = get_network(layer_repo) local batch_size = 1 network:init(batch_size) + function propagator(input, output) local gpu_input = nerv.CuMatrixFloat(input:nrow(), input:ncol()) local gpu_output = nerv.CuMatrixFloat(output:nrow(), output:ncol()) @@ -23,4 +21,5 @@ function propagator(input, output) -- collect garbage in-time to save GPU memory collectgarbage("collect") end + return network.dim_in[1], network.dim_out[1], propagator -- cgit v1.2.3-70-g09d2 From 6aa0bb7b1ca5c92e50d6c7eeb1e3db2c16e71c3b Mon Sep 17 00:00:00 2001 From: Determinant Date: Tue, 4 Aug 2015 17:32:30 +0800 Subject: ... --- embedding_example/Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/embedding_example/Makefile b/embedding_example/Makefile index 3420b30..73287f4 100644 --- a/embedding_example/Makefile +++ b/embedding_example/Makefile @@ -4,6 +4,11 @@ GCC := gcc .PHONY: FORCE +all: main FORCE +clean: + -rm -f *.o + -rm main + FORCE: ../install/bin/luarocks echo "#!/bin/bash" > run.sh $< path >> run.sh -- cgit v1.2.3-70-g09d2 From e935fd6d49b81b0c83d5ad112bfd0a8a68a67175 Mon Sep 17 00:00:00 2001 From: Determinant Date: Wed, 5 Aug 2015 08:04:40 +0800 Subject: put global transformation into a separate library --- Makefile | 1 + nerv/Makefile | 2 +- nerv/matrix/generic/cukernel.cu | 592 ---------------------------------------- speech | 2 +- 4 files changed, 3 insertions(+), 594 deletions(-) delete mode 100644 nerv/matrix/generic/cukernel.cu diff --git a/Makefile b/Makefile index fa888c3..664a83b 100644 --- a/Makefile +++ b/Makefile @@ -9,6 +9,7 @@ luarocks: install: cd nerv; $(PREFIX)/bin/luarocks make speech: + cd speech/speech_utils; $(PREFIX)/bin/luarocks make cd speech/htk_io; $(PREFIX)/bin/luarocks make clean: cd nerv && make clean diff --git a/nerv/Makefile b/nerv/Makefile index fdffd12..728d010 100644 --- a/nerv/Makefile +++ b/nerv/Makefile @@ -66,7 +66,7 @@ $(LIB_PATH)/libluaT.so: $(LUAT_OBJS) $(INST_LIBDIR)/libnerv.so: $(NERV_OBJS) $(LIB_PATH)/libnervcore.so $(LIB_PATH)/libluaT.so gcc -shared -o $@ $(NERV_OBJS) $(LDFLAGS) -Wl,-rpath=$(LIB_PATH) -L$(LIB_PATH) -lnervcore -lluaT -$(OBJ_DIR)/matrix/cumatrix.o: matrix/generic/cumatrix.c matrix/generic/matrix.c matrix/generic/cukernel.cu +$(OBJ_DIR)/matrix/cumatrix.o: matrix/generic/cumatrix.c matrix/generic/matrix.c $(OBJ_DIR)/matrix/mmatrix.o: matrix/generic/mmatrix.c matrix/generic/matrix.c $(OBJ_DIR)/lib/matrix/cumatrix.o: lib/matrix/generic/cumatrix.c lib/matrix/generic/matrix.c lib/matrix/generic/cukernel.cu diff --git a/nerv/matrix/generic/cukernel.cu b/nerv/matrix/generic/cukernel.cu deleted file mode 100644 index 2ae5e62..0000000 --- a/nerv/matrix/generic/cukernel.cu +++ /dev/null @@ -1,592 +0,0 @@ -#ifdef NERV_GENERIC_CUKERNEL -#include -#include -#include "matrix.h" -#include "cuda.h" -#include "float.h" -#define CUDA_THREADS_N 16 -#define CUDA_THREADS_NN ((CUDA_THREADS_N) * (CUDA_THREADS_N)) -#define CEIL_DIV(a, b) (((a) + (b) - 1) / (b)) -__global__ void cudak_(log_elem)(const MATRIX_ELEM *a, MATRIX_ELEM *b, - int nrow, int ncol, int stride) { - int j = blockIdx.x * blockDim.x + threadIdx.x; - int i = blockIdx.y * blockDim.y + threadIdx.y; - long idx; - MATRIX_ELEM tmp; - if (i >= nrow || j >= ncol) return; - idx = j + i * stride; - tmp = a[idx]; - if(tmp < FLT_MIN) tmp = FLT_MIN; - b[idx] = log(tmp); -} - -__global__ void cudak_(mul_elem)(const MATRIX_ELEM *a, const MATRIX_ELEM *b, - MATRIX_ELEM *c, - int nrow, int ncol, int stride) { - int j = blockIdx.x * blockDim.x + threadIdx.x; - int i = blockIdx.y * blockDim.y + threadIdx.y; - long idx; - if (i >= nrow || j >= ncol) return; - idx = j + i * stride; - c[idx] = a[idx] * b[idx]; -} - -__global__ void cudak_(sigmoid)(const MATRIX_ELEM *a, MATRIX_ELEM *b, - int nrow, int ncol, int stride) { - int j = blockIdx.x * blockDim.x + threadIdx.x; - int i = blockIdx.y * blockDim.y + threadIdx.y; - long idx; - if (i >= nrow || j >= ncol) return; - idx = j + i * stride; - b[idx] = 1.0 / (1.0 + exp(-a[idx])); -} - -__global__ void cudak_(sigmoid_grad)(const MATRIX_ELEM *output, - const MATRIX_ELEM *err, - MATRIX_ELEM *nerr, - int nrow, int ncol, int stride) { - int j = blockIdx.x * blockDim.x + threadIdx.x; - int i = blockIdx.y * blockDim.y + threadIdx.y; - long idx; - if (i >= nrow || j >= ncol) return; - idx = j + i * stride; - nerr[idx] = output[idx] * (1.0 - output[idx]) * err[idx]; -} - -__global__ void cudak_(softmax_final)(const MATRIX_ELEM *a, MATRIX_ELEM *b, - const MATRIX_ELEM *max, const MATRIX_ELEM *deno, - int nrow, int ncol, int stride, int mstride) { - int j = blockIdx.x * blockDim.x + threadIdx.x; - int i = blockIdx.y * blockDim.y + threadIdx.y; - long idx; - if (i >= nrow || j >= ncol) return; - idx = j + i * stride; - b[idx] = exp(a[idx] - max[0 + i * mstride]) / deno[0 + i * mstride]; -} - -__global__ void cudak_(block_reduce_rowsum)(const MATRIX_ELEM *input, - MATRIX_ELEM *output, - const int istride, const int ostride, - const int n) { - extern __shared__ MATRIX_ELEM cudak_(arr)[]; - int j = blockIdx.x * blockDim.x + threadIdx.x; - cudak_(arr)[threadIdx.x] = j < n ? input[j + istride * blockIdx.y] : 0; - __syncthreads(); - for (int offset = blockDim.x >> 1; offset; offset >>= 1) - { - if (threadIdx.x < offset) - cudak_(arr)[threadIdx.x] += cudak_(arr)[threadIdx.x + offset]; - __syncthreads(); - } - if (threadIdx.x == 0) - output[blockIdx.x + ostride * blockIdx.y] = cudak_(arr)[0]; -} - -__global__ void cudak_(block_reduce_colsum)(const MATRIX_ELEM *input, - MATRIX_ELEM *output, - const int istride, const int ostride, - const int n) { - extern __shared__ MATRIX_ELEM cudak_(arr)[]; - int i = blockIdx.y * blockDim.y + threadIdx.y; - cudak_(arr)[threadIdx.y] = i < n ? input[blockIdx.x + istride * i] : 0; - __syncthreads(); - for (int offset = blockDim.y >> 1; offset; offset >>= 1) - { - if (threadIdx.y < offset) - cudak_(arr)[threadIdx.y] += cudak_(arr)[threadIdx.y + offset]; - __syncthreads(); - } - if (threadIdx.y == 0) - output[blockIdx.x + ostride * blockIdx.y] = cudak_(arr)[0]; -} - -__global__ void cudak_(block_reduce_colsame)(const MATRIX_ELEM *input, - const MATRIX_ELEM *ref_input, - MATRIX_ELEM *output, - const int istride, const int ostride, - const int n) { - extern __shared__ MATRIX_ELEM cudak_(arr)[]; - int i = blockIdx.y * blockDim.y + threadIdx.y; - cudak_(arr)[threadIdx.y] = (i < n && input[blockIdx.x + istride * i] == \ - ref_input[blockIdx.x + istride * i]) ? 1.0 : 0; - __syncthreads(); - for (int offset = blockDim.y >> 1; offset; offset >>= 1) - { - if (threadIdx.y < offset) - cudak_(arr)[threadIdx.y] += cudak_(arr)[threadIdx.y + offset]; - __syncthreads(); - } - if (threadIdx.y == 0) - output[blockIdx.x + ostride * blockIdx.y] = cudak_(arr)[0]; -} - -__global__ void cudak_(block_reduce_softmax_rowsum)(const MATRIX_ELEM *input, - MATRIX_ELEM *output, - const MATRIX_ELEM *max, - const int istride, const int ostride, - const int mstride, const int n) { - extern __shared__ MATRIX_ELEM cudak_(arr)[]; - int j = blockIdx.x * blockDim.x + threadIdx.x; - cudak_(arr)[threadIdx.x] = j < n ? exp(input[j + istride * blockIdx.y] - \ - max[0 + mstride * blockIdx.y]) : 0; - __syncthreads(); - for (int offset = blockDim.x >> 1; offset; offset >>= 1) - { - if (threadIdx.x < offset) - cudak_(arr)[threadIdx.x] += cudak_(arr)[threadIdx.x + offset]; - __syncthreads(); - } - if (threadIdx.x == 0) - output[blockIdx.x + ostride * blockIdx.y] = cudak_(arr)[0]; -} - -__global__ void cudak_(block_reduce_rowmax)(const MATRIX_ELEM *input, - MATRIX_ELEM *output, - const int istride, const int ostride, - const int n) { - extern __shared__ MATRIX_ELEM cudak_(arr)[]; - int j = blockIdx.x * blockDim.x + threadIdx.x; - cudak_(arr)[threadIdx.x] = j < n ? input[j + istride * blockIdx.y] : -FLT_MAX; - __syncthreads(); - for (int offset = blockDim.x >> 1; offset; offset >>= 1) - { - if (threadIdx.x < offset) - { - MATRIX_ELEM l = cudak_(arr)[threadIdx.x], - r = cudak_(arr)[threadIdx.x + offset]; - if (r > l) - cudak_(arr)[threadIdx.x] = r; - } - __syncthreads(); - } - if (threadIdx.x == 0) - output[blockIdx.x + ostride * blockIdx.y] = cudak_(arr)[0]; -} - -__global__ void cudak_(block_reduce_rowmax_idx)(const MATRIX_ELEM *input, - const MATRIX_ELEM *idx_input, - MATRIX_ELEM *output, - MATRIX_ELEM *idx_output, - const int istride, const int ostride, - const int n) { - extern __shared__ MATRIX_ELEM cudak_(arr)[]; - MATRIX_ELEM *arr_val = cudak_(arr); - MATRIX_ELEM *arr_idx = arr_val + blockDim.x; - int j = blockIdx.x * blockDim.x + threadIdx.x; - arr_val[threadIdx.x] = j < n ? input[j + istride * blockIdx.y] : -FLT_MAX; - arr_idx[threadIdx.x] = j < n ? idx_input[j + istride * blockIdx.y] : 0; - __syncthreads(); - for (int offset = blockDim.x >> 1; offset; offset >>= 1) - { - if (threadIdx.x < offset) - { - MATRIX_ELEM l = arr_val[threadIdx.x], - r = arr_val[threadIdx.x + offset]; - if (r > l) - { - arr_val[threadIdx.x] = r; - arr_idx[threadIdx.x] = arr_idx[threadIdx.x + offset]; - } - } - __syncthreads(); - } - if (threadIdx.x == 0) - { - output[blockIdx.x + ostride * blockIdx.y] = arr_val[0]; - idx_output[blockIdx.x + ostride * blockIdx.y] = arr_idx[0]; - } -} - -__global__ void cudak_(add_row)(const MATRIX_ELEM *a, MATRIX_ELEM *b, - int nrow, int ncol, int stride, double beta) { - int j = blockIdx.x * blockDim.x + threadIdx.x; - int i = blockIdx.y * blockDim.y + threadIdx.y; - if (i >= nrow || j >= ncol) return; - b[j + i * stride] += beta * a[j]; -} - -__global__ void cudak_(fill)(MATRIX_ELEM *a, - int nrow, int ncol, int stride, double val) { - int j = blockIdx.x * blockDim.x + threadIdx.x; - int i = blockIdx.y * blockDim.y + threadIdx.y; - if (i >= nrow || j >= ncol) return; - a[j + i * stride] = val; -} - -__global__ void cudak_(clip)(MATRIX_ELEM *a, - int nrow, int ncol, int stride, double val_1, double val_2) { - int j = blockIdx.x * blockDim.x + threadIdx.x; - int i = blockIdx.y * blockDim.y + threadIdx.y; - if (i >= nrow || j >= ncol) return; - if (a[j + i * stride] > val_2) - a[j + i * stride] = val_2; - else if (a[j + i * stride] < val_1) - a[j + i * stride] = val_1; -} - -__global__ void cudak_(expand_frm)(const MATRIX_ELEM *a, MATRIX_ELEM *b, - int nrow, int ncol, - int enrow, int encol, - int stride, int estride, - int context) { - int j = blockIdx.x * blockDim.x + threadIdx.x; - int i = blockIdx.y * blockDim.y + threadIdx.y; - int ridx; - if (i >= enrow || j >= encol) return; - ridx = i + j / ncol - context; - if (ridx < 0) ridx = 0; - else if (ridx >= nrow) ridx = nrow - 1; - b[j + i * estride] = a[j % ncol + ridx * stride]; -} - -__global__ void cudak_(rearrange_frm)(const MATRIX_ELEM *a, MATRIX_ELEM *b, - int nrow, int ncol, - int stride, int step, int orig_dim) { - int j = blockIdx.x * blockDim.x + threadIdx.x; - int i = blockIdx.y * blockDim.y + threadIdx.y; - if (i >= nrow || j >= ncol) return; - b[j + i * stride] = a[j / step + (j % step) * orig_dim + i * stride]; -} - -__global__ void cudak_(scale_rows_by_col)(const MATRIX_ELEM *a, MATRIX_ELEM *b, - int nrow, int ncol, - int astride, int bstride) { - int j = blockIdx.x * blockDim.x + threadIdx.x; - int i = blockIdx.y * blockDim.y + threadIdx.y; - if (i >= nrow || j >= ncol) return; - b[j + i * bstride] *= a[i * astride]; -} - -__global__ void cudak_(scale_rows_by_row)(const MATRIX_ELEM *a, MATRIX_ELEM *b, - int nrow, int ncol, - int stride) { - int j = blockIdx.x * blockDim.x + threadIdx.x; - int i = blockIdx.y * blockDim.y + threadIdx.y; - if (i >= nrow || j >= ncol) return; - b[j + i * stride] *= a[j]; -} - -__global__ void cudak_(decompress)(const MATRIX_ELEM *a, MATRIX_ELEM *b, - int nrow, int ncol, - int stride_a, int stride_b) { - int j = blockIdx.x * blockDim.x + threadIdx.x; - int i = blockIdx.y * blockDim.y + threadIdx.y; - if (i >= nrow || j >= ncol) return; - b[lrintf(a[j + i * stride_a]) + i * stride_b] = 1.0; -} - -__global__ void cudak_(gen_col_idx)(MATRIX_ELEM *b, - int nrow, int ncol, int stride) { - int j = blockIdx.x * blockDim.x + threadIdx.x; - int i = blockIdx.y * blockDim.y + threadIdx.y; - if (i >= nrow || j >= ncol) return; - b[j + i * stride] = j; -} - -extern "C" { -#include "../cukernel.h" - void cudak_(cuda_log_elem)(const Matrix *a, Matrix *b) { - dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N); - dim3 numBlocks(CEIL_DIV(b->ncol, threadsPerBlock.x), - CEIL_DIV(b->nrow, threadsPerBlock.y)); - cudak_(log_elem)<<>> \ - (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b), - b->nrow, b->ncol, b->stride / sizeof(MATRIX_ELEM)); - cudaStreamSynchronize(0); - } - - void cudak_(cuda_mul_elem)(const Matrix *a, const Matrix *b, - Matrix *c) { - dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N); - dim3 numBlocks(CEIL_DIV(b->ncol, threadsPerBlock.x), - CEIL_DIV(b->nrow, threadsPerBlock.y)); - cudak_(mul_elem)<<>> \ - (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b), - MATRIX_ELEM_PTR(c), - b->nrow, b->ncol, b->stride / sizeof(MATRIX_ELEM)); - cudaStreamSynchronize(0); - } - - void cudak_(cuda_sigmoid)(const Matrix *a, Matrix *b) { - dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N); - dim3 numBlocks(CEIL_DIV(b->ncol, threadsPerBlock.x), - CEIL_DIV(b->nrow, threadsPerBlock.y)); - cudak_(sigmoid)<<>> \ - (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b), b->nrow, b->ncol, - b->stride / sizeof(MATRIX_ELEM)); - cudaStreamSynchronize(0); - } - - void cudak_(cuda_sigmoid_grad)(const Matrix *output, - const Matrix *err, Matrix *nerr) { - dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N); - dim3 numBlocks(CEIL_DIV(nerr->ncol, threadsPerBlock.x), - CEIL_DIV(nerr->nrow, threadsPerBlock.y)); - cudak_(sigmoid_grad)<<>> \ - (MATRIX_ELEM_PTR(output), MATRIX_ELEM_PTR(err), - MATRIX_ELEM_PTR(nerr), - nerr->nrow, nerr->ncol, - nerr->stride / sizeof(MATRIX_ELEM)); - cudaStreamSynchronize(0); - } - - void cudak_(cuda_rowsum)(const Matrix *a, Matrix *b) { - dim3 block(CUDA_THREADS_NN, 1); - int ncol = a->ncol; - int blocks_per_row = CEIL_DIV(ncol, block.x); - dim3 grid(blocks_per_row, a->nrow); - MATRIX_ELEM *res; - size_t stride; - cudaMallocPitch(&res, &stride, blocks_per_row * sizeof(MATRIX_ELEM), a->nrow); - cudak_(block_reduce_rowsum)<<>> \ - (MATRIX_ELEM_PTR(a), res, - a->stride / sizeof(MATRIX_ELEM), stride / sizeof(MATRIX_ELEM), - ncol); - ncol = blocks_per_row; - assert((unsigned long)ncol <= block.x); - grid.x = 1; - cudaStreamSynchronize(0); - cudak_(block_reduce_rowsum)<<>> \ - (res, MATRIX_ELEM_PTR(b), - stride / sizeof(MATRIX_ELEM), b->stride / sizeof(MATRIX_ELEM), - ncol); - cudaStreamSynchronize(0); - cudaFree(res); - } - - void cudak_(cuda_colsame)(const Matrix *a, const Matrix *ref, Matrix *b) { - dim3 block(1, CUDA_THREADS_NN); - int nrow = a->nrow; - int blocks_per_col = CEIL_DIV(nrow, block.y); - dim3 grid(a->ncol, blocks_per_col); - MATRIX_ELEM *res; - size_t stride; - cudaMallocPitch(&res, &stride, a->ncol * sizeof(MATRIX_ELEM), blocks_per_col); - cudak_(block_reduce_colsame)<<>> \ - (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(ref), res, - a->stride / sizeof(MATRIX_ELEM), stride / sizeof(MATRIX_ELEM), - nrow); - nrow = blocks_per_col; - assert((unsigned long)nrow <= block.y); - grid.y = 1; - cudaStreamSynchronize(0); - cudak_(block_reduce_colsum)<<>> \ - (res, MATRIX_ELEM_PTR(b), - stride / sizeof(MATRIX_ELEM), b->stride / sizeof(MATRIX_ELEM), - nrow); - cudaStreamSynchronize(0); - cudaFree(res); - } - - void cudak_(cuda_colsum)(const Matrix *a, Matrix *b) { - dim3 block(1, CUDA_THREADS_NN); - int nrow = a->nrow; - int blocks_per_col = CEIL_DIV(nrow, block.y); - dim3 grid(a->ncol, blocks_per_col); - MATRIX_ELEM *res; - size_t stride; - cudaMallocPitch(&res, &stride, a->ncol * sizeof(MATRIX_ELEM), blocks_per_col); - cudak_(block_reduce_colsum)<<>> \ - (MATRIX_ELEM_PTR(a), res, - a->stride / sizeof(MATRIX_ELEM), stride / sizeof(MATRIX_ELEM), - nrow); - nrow = blocks_per_col; - assert((unsigned long)nrow <= block.y); - grid.y = 1; - cudaStreamSynchronize(0); - cudak_(block_reduce_colsum)<<>> \ - (res, MATRIX_ELEM_PTR(b), - stride / sizeof(MATRIX_ELEM), b->stride / sizeof(MATRIX_ELEM), - nrow); - cudaStreamSynchronize(0); - cudaFree(res); - } - - void cudak_(cuda_softmax_final)(const Matrix *a, const Matrix *max, - const Matrix *deno, Matrix *b) { - dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N); - dim3 numBlocks(CEIL_DIV(b->ncol, threadsPerBlock.x), - CEIL_DIV(b->nrow, threadsPerBlock.y)); - cudak_(softmax_final)<<>> \ - (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b), - MATRIX_ELEM_PTR(max), MATRIX_ELEM_PTR(deno), - b->nrow, b->ncol, - b->stride / sizeof(MATRIX_ELEM), - max->stride / sizeof(MATRIX_ELEM)); - cudaStreamSynchronize(0); - } - - void cudak_(cuda_softmax_denominator)(const Matrix *a, const Matrix *max, Matrix *b) { - dim3 block(CUDA_THREADS_NN, 1); - int ncol = a->ncol; - int blocks_per_row = CEIL_DIV(ncol, block.x); - dim3 grid(blocks_per_row, a->nrow); - MATRIX_ELEM *res; - size_t stride; - assert(max->ncol == 1); - cudaMallocPitch(&res, &stride, blocks_per_row * sizeof(MATRIX_ELEM), a->nrow); - cudak_(block_reduce_softmax_rowsum) \ - <<>> \ - (MATRIX_ELEM_PTR(a), res, MATRIX_ELEM_PTR(max), - a->stride / sizeof(MATRIX_ELEM), stride / sizeof(MATRIX_ELEM), - max->stride / sizeof(MATRIX_ELEM), - ncol); - ncol = blocks_per_row; - assert((unsigned long)ncol <= block.x); - grid.x = 1; - cudaStreamSynchronize(0); - cudak_(block_reduce_rowsum) \ - <<>> \ - (res, MATRIX_ELEM_PTR(b), - stride / sizeof(MATRIX_ELEM), b->stride / sizeof(MATRIX_ELEM), - ncol); - cudaStreamSynchronize(0); - cudaFree(res); - } - - void cudak_(cuda_rowmax)(const Matrix *a, Matrix *b) { - dim3 block(CUDA_THREADS_NN, 1); - int ncol = a->ncol; - int blocks_per_row = CEIL_DIV(ncol, block.x); - dim3 grid(blocks_per_row, a->nrow); - MATRIX_ELEM *res; - size_t stride; - cudaMallocPitch(&res, &stride, blocks_per_row * sizeof(MATRIX_ELEM), a->nrow); - cudak_(block_reduce_rowmax)<<>> \ - (MATRIX_ELEM_PTR(a), res, - a->stride / sizeof(MATRIX_ELEM), stride / sizeof(MATRIX_ELEM), - ncol); - ncol = blocks_per_row; - assert((unsigned long)ncol <= block.x); - grid.x = 1; - cudaStreamSynchronize(0); - cudak_(block_reduce_rowmax)<<>> \ - (res, MATRIX_ELEM_PTR(b), - stride / sizeof(MATRIX_ELEM), b->stride / sizeof(MATRIX_ELEM), - ncol); - cudaStreamSynchronize(0); - cudaFree(res); - } - - void cudak_(cuda_rowmax_idx)(const Matrix *a, Matrix *b, Matrix *b_idx) { - dim3 block(CUDA_THREADS_NN, 1); - int ncol = a->ncol; - int blocks_per_row = CEIL_DIV(ncol, block.x); - dim3 grid(blocks_per_row, a->nrow); - MATRIX_ELEM *a_idx, *res, *res_idx; - size_t stride; - cudaMallocPitch(&a_idx, &stride, a->stride, a->nrow); - cudak_(gen_col_idx)<<>>(a_idx, a->nrow, ncol, stride / sizeof(MATRIX_ELEM)); - cudaMallocPitch(&res, &stride, blocks_per_row * sizeof(MATRIX_ELEM), a->nrow); - cudaMallocPitch(&res_idx, &stride, blocks_per_row * sizeof(MATRIX_ELEM), a->nrow); - cudaStreamSynchronize(0); - cudak_(block_reduce_rowmax_idx)<<>> \ - (MATRIX_ELEM_PTR(a), a_idx, res, res_idx, - a->stride / sizeof(MATRIX_ELEM), stride / sizeof(MATRIX_ELEM), - ncol); - ncol = blocks_per_row; - assert((unsigned long)ncol <= block.x); - grid.x = 1; - cudaStreamSynchronize(0); - cudak_(block_reduce_rowmax_idx)<<>> \ - (res, res_idx, MATRIX_ELEM_PTR(b), MATRIX_ELEM_PTR(b_idx), - stride / sizeof(MATRIX_ELEM), b->stride / sizeof(MATRIX_ELEM), - ncol); - cudaStreamSynchronize(0); - cudaFree(a_idx); - cudaFree(res); - cudaFree(res_idx); - } - - /* in-place calc */ - void cudak_(cuda_add_row)(const Matrix *a, Matrix *b, double beta) { - dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N); - dim3 numBlocks(CEIL_DIV(b->ncol, threadsPerBlock.x), - CEIL_DIV(b->nrow, threadsPerBlock.y)); - cudak_(add_row)<<>> \ - (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b), b->nrow, b->ncol, - b->stride / sizeof(MATRIX_ELEM), beta); - cudaStreamSynchronize(0); - } - - void cudak_(cuda_fill)(Matrix *a, double val) { - dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N); - dim3 numBlocks(CEIL_DIV(a->ncol, threadsPerBlock.x), - CEIL_DIV(a->nrow, threadsPerBlock.y)); - cudak_(fill)<<>> \ - (MATRIX_ELEM_PTR(a), a->nrow, a->ncol, - a->stride / sizeof(MATRIX_ELEM), val); - cudaStreamSynchronize(0); - } - - void cudak_(cuda_clip)(Matrix *a, double val_1, double val_2) { - dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N); - dim3 numBlocks(CEIL_DIV(a->ncol, threadsPerBlock.x), - CEIL_DIV(a->nrow, threadsPerBlock.y)); - cudak_(clip)<<>> \ - (MATRIX_ELEM_PTR(a), a->nrow, a->ncol, - a->stride / sizeof(MATRIX_ELEM), val_1, val_2); - cudaStreamSynchronize(0); - } - - void cudak_(cuda_expand_frm)(const Matrix *a, Matrix *b, int context) { - dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N); - dim3 numBlocks(CEIL_DIV(b->ncol, threadsPerBlock.x), - CEIL_DIV(b->nrow, threadsPerBlock.y)); - cudak_(expand_frm)<<>> \ - (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b), - a->nrow, a->ncol, - b->nrow, b->ncol, - a->stride / sizeof(MATRIX_ELEM), - b->stride / sizeof(MATRIX_ELEM), - context); - cudaStreamSynchronize(0); - } - - void cudak_(cuda_rearrange_frm)(const Matrix *a, Matrix *b, int step) { - dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N); - dim3 numBlocks(CEIL_DIV(b->ncol, threadsPerBlock.x), - CEIL_DIV(b->nrow, threadsPerBlock.y)); - cudak_(rearrange_frm)<<>> \ - (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b), - b->nrow, b->ncol, b->stride / sizeof(MATRIX_ELEM), - step, b->ncol / step); - cudaStreamSynchronize(0); - } - - void cudak_(cuda_scale_rows_by_col)(const Matrix *a, Matrix *b) { - dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N); - dim3 numBlocks(CEIL_DIV(b->ncol, threadsPerBlock.x), - CEIL_DIV(b->nrow, threadsPerBlock.y)); - cudak_(scale_rows_by_col)<<>> \ - (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b), - b->nrow, b->ncol, - a->stride / sizeof(MATRIX_ELEM), - b->stride / sizeof(MATRIX_ELEM)); - cudaStreamSynchronize(0); - } - - void cudak_(cuda_scale_rows_by_row)(const Matrix *a, Matrix *b) { - dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N); - dim3 numBlocks(CEIL_DIV(b->ncol, threadsPerBlock.x), - CEIL_DIV(b->nrow, threadsPerBlock.y)); - cudak_(scale_rows_by_row)<<>> \ - (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b), - b->nrow, b->ncol, b->stride / sizeof(MATRIX_ELEM)); - cudaStreamSynchronize(0); - } - - void cudak_(cuda_decompress)(const Matrix *a, Matrix *b) { - dim3 threadsPerBlock(1, CUDA_THREADS_NN); - dim3 numBlocks(1, CEIL_DIV(a->nrow, threadsPerBlock.y)); - cudak_(decompress)<<>> \ - (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b), - a->nrow, a->ncol, - a->stride / sizeof(MATRIX_ELEM), - b->stride / sizeof(MATRIX_ELEM)); - cudaStreamSynchronize(0); - } -} -#endif diff --git a/speech b/speech index 08e33af..acd1bc3 160000 --- a/speech +++ b/speech @@ -1 +1 @@ -Subproject commit 08e33afa533af1f026ac271446a0c873fe0bb5cb +Subproject commit acd1bc3cf812f69a6260179b584f2a3f0e6d6b80 -- cgit v1.2.3-70-g09d2 From bca6cfa05af1dfc898bdb35f9f481048840e9217 Mon Sep 17 00:00:00 2001 From: Determinant Date: Wed, 5 Aug 2015 08:10:24 +0800 Subject: keep up with speech repo --- speech | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/speech b/speech index acd1bc3..261aed4 160000 --- a/speech +++ b/speech @@ -1 +1 @@ -Subproject commit acd1bc3cf812f69a6260179b584f2a3f0e6d6b80 +Subproject commit 261aed43b863c85ecf709dcac2739c388ec491d2 -- cgit v1.2.3-70-g09d2 From 00b02761747caf09756c7dccf11a7236777cbb42 Mon Sep 17 00:00:00 2001 From: Determinant Date: Wed, 5 Aug 2015 08:17:59 +0800 Subject: ... --- .gitmodules | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.gitmodules b/.gitmodules index 1432de9..16785f1 100644 --- a/.gitmodules +++ b/.gitmodules @@ -3,10 +3,7 @@ url = http://luajit.org/git/luajit-2.0.git [submodule "nerv-speech"] path = nerv-speech - url = https://github.com/Determinant/nerv-speech.git -[submodule "speech"] - path = speech - url = https://github.com/Determinant/nerv-speech.git + url = https://github.com/Nerv-SJTU/nerv-speech.git [submodule "luarocks"] path = luarocks url = https://github.com/keplerproject/luarocks.git -- cgit v1.2.3-70-g09d2 From 30701effba795635c9dcfa86882486f1f184e243 Mon Sep 17 00:00:00 2001 From: Determinant Date: Wed, 5 Aug 2015 08:19:03 +0800 Subject: ... --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 16785f1..afad59e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,7 +1,7 @@ [submodule "luajit-2.0"] path = luajit-2.0 url = http://luajit.org/git/luajit-2.0.git -[submodule "nerv-speech"] +[submodule "speech"] path = nerv-speech url = https://github.com/Nerv-SJTU/nerv-speech.git [submodule "luarocks"] -- cgit v1.2.3-70-g09d2 From 7ae89059d68850e12826bc6812e4a6d521e45b53 Mon Sep 17 00:00:00 2001 From: Determinant Date: Wed, 5 Aug 2015 08:20:18 +0800 Subject: ... --- .gitmodules | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index afad59e..217ded3 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,8 +1,8 @@ [submodule "luajit-2.0"] path = luajit-2.0 url = http://luajit.org/git/luajit-2.0.git -[submodule "speech"] - path = nerv-speech +[submodule "nerv-speech"] + path = speech url = https://github.com/Nerv-SJTU/nerv-speech.git [submodule "luarocks"] path = luarocks -- cgit v1.2.3-70-g09d2 From 7579ff4941d7019d4e911978879ec07b62a4e523 Mon Sep 17 00:00:00 2001 From: Determinant Date: Wed, 5 Aug 2015 09:29:24 +0800 Subject: use expanded features and do global transf in embedding_example --- embedding_example/setup_nerv.lua | 10 ++++++---- embedding_example/swb_baseline_decode.lua | 5 +++++ speech | 2 +- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/embedding_example/setup_nerv.lua b/embedding_example/setup_nerv.lua index 3ae878d..49a5dd6 100644 --- a/embedding_example/setup_nerv.lua +++ b/embedding_example/setup_nerv.lua @@ -7,17 +7,19 @@ param_repo:import(gconf.initialized_param, nil, gconf) local sublayer_repo = make_sublayer_repo(param_repo) local layer_repo = make_layer_repo(sublayer_repo, param_repo) local network = get_network(layer_repo) +local global_transf = get_global_transf(layer_repo) local batch_size = 1 network:init(batch_size) function propagator(input, output) - local gpu_input = nerv.CuMatrixFloat(input:nrow(), input:ncol()) + local transformed = nerv.speech_utils.global_transf(input, + global_transf, 0, gconf) -- preprocessing + local gpu_input = nerv.CuMatrixFloat(transformed:nrow(), transformed:ncol()) local gpu_output = nerv.CuMatrixFloat(output:nrow(), output:ncol()) - gpu_input:copy_fromh(input) - print(gpu_input) + print(transformed) + gpu_input:copy_fromh(transformed) network:propagate({gpu_input}, {gpu_output}) gpu_output:copy_toh(output) - print(output) -- collect garbage in-time to save GPU memory collectgarbage("collect") end diff --git a/embedding_example/swb_baseline_decode.lua b/embedding_example/swb_baseline_decode.lua index 14a463b..8cdb320 100644 --- a/embedding_example/swb_baseline_decode.lua +++ b/embedding_example/swb_baseline_decode.lua @@ -107,3 +107,8 @@ end function get_network(layer_repo) return layer_repo:get_layer("main") end + + +function get_global_transf(layer_repo) + return layer_repo:get_layer("global_transf") +end diff --git a/speech b/speech index 261aed4..c6c6442 160000 --- a/speech +++ b/speech @@ -1 +1 @@ -Subproject commit 261aed43b863c85ecf709dcac2739c388ec491d2 +Subproject commit c6c644223dc9168befd189bf6f33243390671c99 -- cgit v1.2.3-70-g09d2 From 156e2b5d97ac7a702e655dc3a1e260c72eecaa9c Mon Sep 17 00:00:00 2001 From: Determinant Date: Thu, 6 Aug 2015 08:56:18 +0800 Subject: fix typoes in nerv.error --- nerv/init.lua | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/nerv/init.lua b/nerv/init.lua index 183ae6d..9c1a5c8 100644 --- a/nerv/init.lua +++ b/nerv/init.lua @@ -1,15 +1,19 @@ require 'libnerv' -function nerv.error(fmt, ...) - error("[nerv] internal error: " .. fmt .. "\n", ...) -end - function nerv.error_method_not_implemented() nerv.error("method not implemented"); end +function nerv.sprintf(fmt, ...) + return string.format(fmt, ...) +end + function nerv.printf(fmt, ...) - io.write(string.format(fmt, ...)) + io.write(nerv.sprintf(fmt, ...)) +end + +function nerv.error(fmt, ...) + error(nerv.sprintf("[nerv] internal error: " .. fmt .. "\n", ...)) end function nerv.mesg_with_timestamp(fmt, ...) -- cgit v1.2.3-70-g09d2 From ddc52b089a29dbe805ccbe499460b3e5d5b060c7 Mon Sep 17 00:00:00 2001 From: Determinant Date: Thu, 6 Aug 2015 10:25:26 +0800 Subject: do not use submodule for nerv-speech --- .gitmodules | 3 --- README.md | 4 ++-- speech | 1 - 3 files changed, 2 insertions(+), 6 deletions(-) delete mode 160000 speech diff --git a/.gitmodules b/.gitmodules index 217ded3..9f556c5 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,9 +1,6 @@ [submodule "luajit-2.0"] path = luajit-2.0 url = http://luajit.org/git/luajit-2.0.git -[submodule "nerv-speech"] - path = speech - url = https://github.com/Nerv-SJTU/nerv-speech.git [submodule "luarocks"] path = luarocks url = https://github.com/keplerproject/luarocks.git diff --git a/README.md b/README.md index 10d531c..efec2c4 100644 --- a/README.md +++ b/README.md @@ -8,9 +8,9 @@ First make sure you have __lua__ and __CUDA__ installed on your computer. __Nerv__ is currently developed via github.You can download and make __Nerv__ by doing the following: ``` cd ~ -git clone https://github.com/Determinant/nerv.git +git clone https://github.com/Nerv-SJTU/nerv.git cd nerv -git submodule init && git submodule update +git clone https://github.com/Nerv-SJTU/nerv-speech.git make ``` The `git submodule` command is for the __luajit__ repository inside __Nerv__. diff --git a/speech b/speech deleted file mode 160000 index c6c6442..0000000 --- a/speech +++ /dev/null @@ -1 +0,0 @@ -Subproject commit c6c644223dc9168befd189bf6f33243390671c99 -- cgit v1.2.3-70-g09d2 From 2dc87bc02a1242dd5e029d0baaf4e0ae7173184f Mon Sep 17 00:00:00 2001 From: Determinant Date: Thu, 6 Aug 2015 10:34:43 +0800 Subject: ... --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index efec2c4..be8e5b0 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,8 @@ __Nerv__ is currently developed via github.You can download and make __Nerv__ by cd ~ git clone https://github.com/Nerv-SJTU/nerv.git cd nerv -git clone https://github.com/Nerv-SJTU/nerv-speech.git +git clone https://github.com/Nerv-SJTU/nerv-speech.git speech +git submodule init && git submodule update make ``` The `git submodule` command is for the __luajit__ repository inside __Nerv__. -- cgit v1.2.3-70-g09d2 From b4d9cfa8e3a4735687311577dded97d889340134 Mon Sep 17 00:00:00 2001 From: Determinant Date: Thu, 6 Aug 2015 14:08:26 +0800 Subject: make network configuration example file clearer --- embedding_example/main.c | 2 +- embedding_example/setup_nerv.lua | 5 +- embedding_example/swb_baseline_decode.lua | 114 --------------------- nerv/doc/gh-pages | 1 + nerv/examples/asr_trainer.lua | 11 +- nerv/examples/swb_baseline.lua | 68 ++++++++++--- nerv/examples/swb_baseline_basic.lua | 161 ++++++++++++++++++++++++++++++ nerv/nn/layer_dag.lua | 3 +- nerv/nn/layer_repo.lua | 8 +- 9 files changed, 230 insertions(+), 143 deletions(-) delete mode 100644 embedding_example/swb_baseline_decode.lua create mode 160000 nerv/doc/gh-pages create mode 100644 nerv/examples/swb_baseline_basic.lua diff --git a/embedding_example/main.c b/embedding_example/main.c index b3c9bf2..8856d58 100644 --- a/embedding_example/main.c +++ b/embedding_example/main.c @@ -22,7 +22,7 @@ void setup_nerv() { luaL_openlibs(L); luaL_loadfile(L, "setup_nerv.lua"); /* network configuration */ - lua_pushstring(L, "swb_baseline_decode.lua"); + lua_pushstring(L, "../nerv/examples/swb_baseline.lua"); if (lua_pcall(L, 1, LUA_MULTRET, 0)) { printf("%s\n", luaL_checkstring(L, 1)); diff --git a/embedding_example/setup_nerv.lua b/embedding_example/setup_nerv.lua index 49a5dd6..5ade950 100644 --- a/embedding_example/setup_nerv.lua +++ b/embedding_example/setup_nerv.lua @@ -4,9 +4,8 @@ local arg = {...} dofile(arg[1]) local param_repo = nerv.ParamRepo() param_repo:import(gconf.initialized_param, nil, gconf) -local sublayer_repo = make_sublayer_repo(param_repo) -local layer_repo = make_layer_repo(sublayer_repo, param_repo) -local network = get_network(layer_repo) +local layer_repo = make_layer_repo(param_repo) +local network = get_decode_network(layer_repo) local global_transf = get_global_transf(layer_repo) local batch_size = 1 network:init(batch_size) diff --git a/embedding_example/swb_baseline_decode.lua b/embedding_example/swb_baseline_decode.lua deleted file mode 100644 index 8cdb320..0000000 --- a/embedding_example/swb_baseline_decode.lua +++ /dev/null @@ -1,114 +0,0 @@ -require 'htk_io' -gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9, - cumat_type = nerv.CuMatrixFloat, - mmat_type = nerv.MMatrixFloat, - frm_ext = 5, - tr_scp = "/slfs1/users/mfy43/swb_ivec/train_bp.scp", - cv_scp = "/slfs1/users/mfy43/swb_ivec/train_cv.scp", - htk_conf = "/slfs1/users/mfy43/swb_ivec/plp_0_d_a.conf", - initialized_param = {"/slfs1/users/mfy43/swb_init.nerv", - "/slfs1/users/mfy43/swb_global_transf.nerv"}, - debug = false} - -function make_sublayer_repo(param_repo) - return nerv.LayerRepo( - { - -- global transf - ["nerv.BiasLayer"] = - { - blayer1 = {{bias = "bias1"}, {dim_in = {429}, dim_out = {429}}}, - blayer2 = {{bias = "bias2"}, {dim_in = {429}, dim_out = {429}}} - }, - ["nerv.WindowLayer"] = - { - wlayer1 = {{window = "window1"}, {dim_in = {429}, dim_out = {429}}}, - wlayer2 = {{window = "window2"}, {dim_in = {429}, dim_out = {429}}} - }, - -- biased linearity - ["nerv.AffineLayer"] = - { - affine0 = {{ltp = "affine0_ltp", bp = "affine0_bp"}, - {dim_in = {429}, dim_out = {2048}}}, - affine1 = {{ltp = "affine1_ltp", bp = "affine1_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine2 = {{ltp = "affine2_ltp", bp = "affine2_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine3 = {{ltp = "affine3_ltp", bp = "affine3_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine4 = {{ltp = "affine4_ltp", bp = "affine4_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine5 = {{ltp = "affine5_ltp", bp = "affine5_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine6 = {{ltp = "affine6_ltp", bp = "affine6_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine7 = {{ltp = "affine7_ltp", bp = "affine7_bp"}, - {dim_in = {2048}, dim_out = {3001}}} - }, - ["nerv.SigmoidLayer"] = - { - sigmoid0 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid1 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid2 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid3 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid4 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid5 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid6 = {{}, {dim_in = {2048}, dim_out = {2048}}} - }, - ["nerv.SoftmaxLayer"] = - { - soutput = {{}, {dim_in = {3001}, dim_out = {3001}}} - } - }, param_repo, gconf) -end - -function make_layer_repo(sublayer_repo, param_repo) - return nerv.LayerRepo( - { - ["nerv.DAGLayer"] = - { - global_transf = {{}, { - dim_in = {429}, dim_out = {429}, - sub_layers = sublayer_repo, - connections = { - ["[1]"] = "blayer1[1]", - ["blayer1[1]"] = "wlayer1[1]", - ["wlayer1[1]"] = "blayer2[1]", - ["blayer2[1]"] = "wlayer2[1]", - ["wlayer2[1]"] = "[1]" - } - }}, - main = {{}, { - dim_in = {429}, dim_out = {3001}, - sub_layers = sublayer_repo, - connections = { - ["[1]"] = "affine0[1]", - ["affine0[1]"] = "sigmoid0[1]", - ["sigmoid0[1]"] = "affine1[1]", - ["affine1[1]"] = "sigmoid1[1]", - ["sigmoid1[1]"] = "affine2[1]", - ["affine2[1]"] = "sigmoid2[1]", - ["sigmoid2[1]"] = "affine3[1]", - ["affine3[1]"] = "sigmoid3[1]", - ["sigmoid3[1]"] = "affine4[1]", - ["affine4[1]"] = "sigmoid4[1]", - ["sigmoid4[1]"] = "affine5[1]", - ["affine5[1]"] = "sigmoid5[1]", - ["sigmoid5[1]"] = "affine6[1]", - ["affine6[1]"] = "sigmoid6[1]", - ["sigmoid6[1]"] = "affine7[1]", - ["affine7[1]"] = "soutput[1]", - ["soutput[1]"] = "[1]" - } - }} - } - }, param_repo, gconf) -end - -function get_network(layer_repo) - return layer_repo:get_layer("main") -end - - -function get_global_transf(layer_repo) - return layer_repo:get_layer("global_transf") -end diff --git a/nerv/doc/gh-pages b/nerv/doc/gh-pages new file mode 160000 index 0000000..195d95b --- /dev/null +++ b/nerv/doc/gh-pages @@ -0,0 +1 @@ +Subproject commit 195d95bb663258e3b1c3962f946db9c374018a8c diff --git a/nerv/examples/asr_trainer.lua b/nerv/examples/asr_trainer.lua index 8dfb2ac..dcadfa3 100644 --- a/nerv/examples/asr_trainer.lua +++ b/nerv/examples/asr_trainer.lua @@ -1,8 +1,7 @@ function build_trainer(ifname) local param_repo = nerv.ParamRepo() param_repo:import(ifname, nil, gconf) - local sublayer_repo = make_sublayer_repo(param_repo) - local layer_repo = make_layer_repo(sublayer_repo, param_repo) + local layer_repo = make_layer_repo(param_repo) local network = get_network(layer_repo) local input_order = get_input_order() local iterative_trainer = function (prefix, scp_file, bp) @@ -18,7 +17,7 @@ function build_trainer(ifname) -- prine stat periodically gconf.cnt = gconf.cnt + 1 if gconf.cnt == 1000 then - print_stat(sublayer_repo) + print_stat(layer_repo) nerv.CuMatrix.print_profile() nerv.CuMatrix.clear_profile() gconf.cnt = 0 @@ -42,16 +41,16 @@ function build_trainer(ifname) -- collect garbage in-time to save GPU memory collectgarbage("collect") end - print_stat(sublayer_repo) + print_stat(layer_repo) nerv.CuMatrix.print_profile() nerv.CuMatrix.clear_profile() if (not bp) and prefix ~= nil then nerv.info("writing back...") local fname = string.format("%s_cv%.3f.nerv", - prefix, get_accuracy(sublayer_repo)) + prefix, get_accuracy(layer_repo)) network:get_params():export(fname, nil) end - return get_accuracy(sublayer_repo) + return get_accuracy(layer_repo) end return iterative_trainer end diff --git a/nerv/examples/swb_baseline.lua b/nerv/examples/swb_baseline.lua index 7783f2a..0e9f897 100644 --- a/nerv/examples/swb_baseline.lua +++ b/nerv/examples/swb_baseline.lua @@ -10,8 +10,8 @@ gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9, "/slfs1/users/mfy43/swb_global_transf.nerv"}, debug = false} -function make_sublayer_repo(param_repo) - return nerv.LayerRepo( +function make_layer_repo(param_repo) + local layer_repo = nerv.LayerRepo( { -- global transf ["nerv.BiasLayer"] = @@ -54,21 +54,23 @@ function make_sublayer_repo(param_repo) sigmoid5 = {{}, {dim_in = {2048}, dim_out = {2048}}}, sigmoid6 = {{}, {dim_in = {2048}, dim_out = {2048}}} }, - ["nerv.SoftmaxCELayer"] = + ["nerv.SoftmaxCELayer"] = -- softmax + ce criterion layer for finetune output { ce_crit = {{}, {dim_in = {3001, 1}, dim_out = {1}, compressed = true}} + }, + ["nerv.SoftmaxLayer"] = -- softmax for decode output + { + softmax = {{}, {dim_in = {3001}, dim_out = {3001}}} } }, param_repo, gconf) -end -function make_layer_repo(sublayer_repo, param_repo) - return nerv.LayerRepo( + layer_repo:add_layers( { ["nerv.DAGLayer"] = { global_transf = {{}, { dim_in = {429}, dim_out = {429}, - sub_layers = sublayer_repo, + sub_layers = layer_repo, connections = { ["[1]"] = "blayer1[1]", ["blayer1[1]"] = "wlayer1[1]", @@ -78,8 +80,8 @@ function make_layer_repo(sublayer_repo, param_repo) } }}, main = {{}, { - dim_in = {429, 1}, dim_out = {1}, - sub_layers = sublayer_repo, + dim_in = {429}, dim_out = {3001}, + sub_layers = layer_repo, connections = { ["[1]"] = "affine0[1]", ["affine0[1]"] = "sigmoid0[1]", @@ -96,17 +98,51 @@ function make_layer_repo(sublayer_repo, param_repo) ["sigmoid5[1]"] = "affine6[1]", ["affine6[1]"] = "sigmoid6[1]", ["sigmoid6[1]"] = "affine7[1]", - ["affine7[1]"] = "ce_crit[1]", + ["affine7[1]"] = "[1]" + } + }} + } + }, param_repo, gconf) + + layer_repo:add_layers( + { + ["nerv.DAGLayer"] = + { + ce_output = {{}, { + dim_in = {429, 1}, dim_out = {1}, + sub_layers = layer_repo, + connections = { + ["[1]"] = "main[1]", + ["main[1]"] = "ce_crit[1]", ["[2]"] = "ce_crit[2]", ["ce_crit[1]"] = "[1]" } + }}, + softmax_output = {{}, { + dim_in = {429}, dim_out = {3001}, + sub_layers = layer_repo, + connections = { + ["[1]"] = "main[1]", + ["main[1]"] = "softmax[1]", + ["softmax[1]"] = "[1]" + } }} } }, param_repo, gconf) + + return layer_repo end function get_network(layer_repo) - return layer_repo:get_layer("main") + return layer_repo:get_layer("ce_output") +end + +function get_decode_network(layer_repo) + return layer_repo:get_layer("softmax_output") +end + +function get_global_transf(layer_repo) + return layer_repo:get_layer("global_transf") end function make_readers(scp_file, layer_repo) @@ -145,18 +181,18 @@ function get_input_order() return {"main_scp", "phone_state"} end -function get_accuracy(sublayer_repo) - local ce_crit = sublayer_repo:get_layer("ce_crit") +function get_accuracy(layer_repo) + local ce_crit = layer_repo:get_layer("ce_crit") return ce_crit.total_correct / ce_crit.total_frames * 100 end -function print_stat(sublayer_repo) - local ce_crit = sublayer_repo:get_layer("ce_crit") +function print_stat(layer_repo) + local ce_crit = layer_repo:get_layer("ce_crit") nerv.info("*** training stat begin ***") nerv.printf("cross entropy:\t\t%.8f\n", ce_crit.total_ce) nerv.printf("correct:\t\t%d\n", ce_crit.total_correct) nerv.printf("frames:\t\t\t%d\n", ce_crit.total_frames) nerv.printf("err/frm:\t\t%.8f\n", ce_crit.total_ce / ce_crit.total_frames) - nerv.printf("accuracy:\t\t%.3f%%\n", get_accuracy(sublayer_repo)) + nerv.printf("accuracy:\t\t%.3f%%\n", get_accuracy(layer_repo)) nerv.info("*** training stat end ***") end diff --git a/nerv/examples/swb_baseline_basic.lua b/nerv/examples/swb_baseline_basic.lua new file mode 100644 index 0000000..c47ec3e --- /dev/null +++ b/nerv/examples/swb_baseline_basic.lua @@ -0,0 +1,161 @@ +require 'htk_io' +gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9, + cumat_type = nerv.CuMatrixFloat, + mmat_type = nerv.MMatrixFloat, + frm_ext = 5, + tr_scp = "/slfs1/users/mfy43/swb_ivec/train_bp.scp", + cv_scp = "/slfs1/users/mfy43/swb_ivec/train_cv.scp", + htk_conf = "/slfs1/users/mfy43/swb_ivec/plp_0_d_a.conf", + initialized_param = {"/slfs1/users/mfy43/swb_init.nerv", + "/slfs1/users/mfy43/swb_global_transf.nerv"}, + debug = false} + +function make_layer_repo(param_repo) + local layer_repo = nerv.LayerRepo( + { + -- global transf + ["nerv.BiasLayer"] = + { + blayer1 = {{bias = "bias1"}, {dim_in = {429}, dim_out = {429}}}, + blayer2 = {{bias = "bias2"}, {dim_in = {429}, dim_out = {429}}} + }, + ["nerv.WindowLayer"] = + { + wlayer1 = {{window = "window1"}, {dim_in = {429}, dim_out = {429}}}, + wlayer2 = {{window = "window2"}, {dim_in = {429}, dim_out = {429}}} + }, + -- biased linearity + ["nerv.AffineLayer"] = + { + affine0 = {{ltp = "affine0_ltp", bp = "affine0_bp"}, + {dim_in = {429}, dim_out = {2048}}}, + affine1 = {{ltp = "affine1_ltp", bp = "affine1_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine2 = {{ltp = "affine2_ltp", bp = "affine2_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine3 = {{ltp = "affine3_ltp", bp = "affine3_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine4 = {{ltp = "affine4_ltp", bp = "affine4_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine5 = {{ltp = "affine5_ltp", bp = "affine5_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine6 = {{ltp = "affine6_ltp", bp = "affine6_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine7 = {{ltp = "affine7_ltp", bp = "affine7_bp"}, + {dim_in = {2048}, dim_out = {3001}}} + }, + ["nerv.SigmoidLayer"] = + { + sigmoid0 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid1 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid2 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid3 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid4 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid5 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid6 = {{}, {dim_in = {2048}, dim_out = {2048}}} + }, + ["nerv.SoftmaxCELayer"] = + { + ce_crit = {{}, {dim_in = {3001, 1}, dim_out = {1}, compressed = true}} + } + }, param_repo, gconf) + + layer_repo:add_layers( + { + ["nerv.DAGLayer"] = + { + global_transf = {{}, { + dim_in = {429}, dim_out = {429}, + sub_layers = layer_repo, + connections = { + ["[1]"] = "blayer1[1]", + ["blayer1[1]"] = "wlayer1[1]", + ["wlayer1[1]"] = "blayer2[1]", + ["blayer2[1]"] = "wlayer2[1]", + ["wlayer2[1]"] = "[1]" + } + }}, + main = {{}, { + dim_in = {429, 1}, dim_out = {1}, + sub_layers = layer_repo, + connections = { + ["[1]"] = "affine0[1]", + ["affine0[1]"] = "sigmoid0[1]", + ["sigmoid0[1]"] = "affine1[1]", + ["affine1[1]"] = "sigmoid1[1]", + ["sigmoid1[1]"] = "affine2[1]", + ["affine2[1]"] = "sigmoid2[1]", + ["sigmoid2[1]"] = "affine3[1]", + ["affine3[1]"] = "sigmoid3[1]", + ["sigmoid3[1]"] = "affine4[1]", + ["affine4[1]"] = "sigmoid4[1]", + ["sigmoid4[1]"] = "affine5[1]", + ["affine5[1]"] = "sigmoid5[1]", + ["sigmoid5[1]"] = "affine6[1]", + ["affine6[1]"] = "sigmoid6[1]", + ["sigmoid6[1]"] = "affine7[1]", + ["affine7[1]"] = "ce_crit[1]", + ["[2]"] = "ce_crit[2]", + ["ce_crit[1]"] = "[1]" + } + }} + } + }, param_repo, gconf) + return layer_repo +end + +function get_network(layer_repo) + return layer_repo:get_layer("main") +end + +function make_readers(scp_file, layer_repo) + return { + {reader = nerv.TNetReader(gconf, + { + id = "main_scp", + scp_file = scp_file, + conf_file = gconf.htk_conf, + frm_ext = gconf.frm_ext, + mlfs = { + phone_state = { + file = "/slfs1/users/mfy43/swb_ivec/ref.mlf", + format = "map", + format_arg = "/slfs1/users/mfy43/swb_ivec/dict", + dir = "*/", + ext = "lab" + } + }, + global_transf = layer_repo:get_layer("global_transf") + }), + data = {main_scp = 429, phone_state = 1}} + } +end + +function make_buffer(readers) + return nerv.SGDBuffer(gconf, + { + buffer_size = gconf.buffer_size, + randomize = gconf.randomize, + readers = readers + }) +end + +function get_input_order() + return {"main_scp", "phone_state"} +end + +function get_accuracy(layer_repo) + local ce_crit = layer_repo:get_layer("ce_crit") + return ce_crit.total_correct / ce_crit.total_frames * 100 +end + +function print_stat(layer_repo) + local ce_crit = layer_repo:get_layer("ce_crit") + nerv.info("*** training stat begin ***") + nerv.printf("cross entropy:\t\t%.8f\n", ce_crit.total_ce) + nerv.printf("correct:\t\t%d\n", ce_crit.total_correct) + nerv.printf("frames:\t\t\t%d\n", ce_crit.total_frames) + nerv.printf("err/frm:\t\t%.8f\n", ce_crit.total_ce / ce_crit.total_frames) + nerv.printf("accuracy:\t\t%.3f%%\n", get_accuracy(layer_repo)) + nerv.info("*** training stat end ***") +end diff --git a/nerv/nn/layer_dag.lua b/nerv/nn/layer_dag.lua index e5c1ac7..e9d4d86 100644 --- a/nerv/nn/layer_dag.lua +++ b/nerv/nn/layer_dag.lua @@ -112,7 +112,7 @@ function DAGLayer:__init(id, global_conf, layer_conf) end end for i = 1, #queue do - nerv.info("enqueued layer: %s", queue[i].layer.id) + nerv.info("enqueued layer: %s %s", queue[i].layer, queue[i].layer.id) end for id, ref in pairs(layers) do @@ -125,6 +125,7 @@ function DAGLayer:__init(id, global_conf, layer_conf) self.layers = layers self.inputs = inputs self.outputs = outputs + self.id = id self.dim_in = dim_in self.dim_out = dim_out self.parsed_conn = parsed_conn diff --git a/nerv/nn/layer_repo.lua b/nerv/nn/layer_repo.lua index 602c37c..ef333a7 100644 --- a/nerv/nn/layer_repo.lua +++ b/nerv/nn/layer_repo.lua @@ -1,7 +1,12 @@ local LayerRepo = nerv.class("nerv.LayerRepo") function LayerRepo:__init(layer_spec, param_repo, global_conf) - local layers = {} + self.layers = {} + self:add_layers(layer_spec, param_repo, global_conf) +end + +function LayerRepo:add_layers(layer_spec, param_repo, global_conf) + local layers = self.layers for ltype, llist in pairs(layer_spec) do local layer_type = nerv.get_type(ltype) for id, spec in pairs(llist) do @@ -22,7 +27,6 @@ function LayerRepo:__init(layer_spec, param_repo, global_conf) layers[id] = layer_type(id, global_conf, layer_config) end end - self.layers = layers end function LayerRepo:get_layer(lid) -- cgit v1.2.3-70-g09d2 From 7082ba094be6ccbf97cfaf893ea437f31ced197b Mon Sep 17 00:00:00 2001 From: Determinant Date: Thu, 6 Aug 2015 19:47:59 +0800 Subject: ... --- nerv/doc/gh-pages | 1 - 1 file changed, 1 deletion(-) delete mode 160000 nerv/doc/gh-pages diff --git a/nerv/doc/gh-pages b/nerv/doc/gh-pages deleted file mode 160000 index 195d95b..0000000 --- a/nerv/doc/gh-pages +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 195d95bb663258e3b1c3962f946db9c374018a8c -- cgit v1.2.3-70-g09d2 From 4b3e8591816e553a4409f5fa95f5983e59ff711f Mon Sep 17 00:00:00 2001 From: Determinant Date: Fri, 14 Aug 2015 15:02:34 +0800 Subject: add profiling for copy_rows_fromh_by_idx --- nerv/Makefile | 2 +- nerv/io/sgd_buffer.lua | 4 +++- nerv/lib/matrix/generic/cumatrix.c | 2 ++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/nerv/Makefile b/nerv/Makefile index 728d010..0b433d5 100644 --- a/nerv/Makefile +++ b/nerv/Makefile @@ -36,7 +36,7 @@ LUA_LIBS := matrix/init.lua io/init.lua init.lua \ io/sgd_buffer.lua INCLUDE := -I $(LUA_INCDIR) -DLUA_USE_APICHECK -CUDA_BASE := /usr/local/cuda-6.5 +CUDA_BASE := /usr/local/cuda-7.0 #CUDA_BASE := /usr/local/cuda-5.0 CUDA_INCLUDE := -I $(CUDA_BASE)/include/ INCLUDE += $(CUDA_INCLUDE) diff --git a/nerv/io/sgd_buffer.lua b/nerv/io/sgd_buffer.lua index f4f7dfe..604fa07 100644 --- a/nerv/io/sgd_buffer.lua +++ b/nerv/io/sgd_buffer.lua @@ -41,7 +41,7 @@ function SGDBuffer:saturate() buff.data:copy_from(buff.leftover, 0, lrow) buff.leftover = nil end - nerv.printf("leftover: %d\n", lrow) + nerv.printf("buffer leftover: %d\n", lrow) reader.tail = lrow reader.has_leftover = false end @@ -87,9 +87,11 @@ end function SGDBuffer:get_data() local batch_size = self.gconf.batch_size if self.head >= self.tail then -- buffer is empty + local t = os.clock() if not self:saturate() then return nil -- the remaining data cannot build a batch end + nerv.info("%.3fs to fill the buffer\n", os.clock() - t) end if self.head + batch_size > self.tail then return nil -- the remaining data cannot build a batch diff --git a/nerv/lib/matrix/generic/cumatrix.c b/nerv/lib/matrix/generic/cumatrix.c index 40a0030..2cb3563 100644 --- a/nerv/lib/matrix/generic/cumatrix.c +++ b/nerv/lib/matrix/generic/cumatrix.c @@ -321,6 +321,7 @@ void nerv_matrix_(copy_rows_fromh_by_idx)(Matrix *a, const Matrix *b, NERV_EXIT_STATUS(status, MAT_IDX_VECTOR_EXP, 0); if (a->ncol != b->ncol) NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0); + PROFILE_START cudaStream_t *streams = (cudaStream_t*)malloc(sizeof(cudaStream_t) * nrow); for (i = 0; i < nrow; i++) { @@ -339,6 +340,7 @@ void nerv_matrix_(copy_rows_fromh_by_idx)(Matrix *a, const Matrix *b, CUDA_SAFE_CALL(cudaStreamDestroy(streams[i]), status); } free(streams); + PROFILE_STOP NERV_SET_STATUS(status, NERV_NORMAL, 0); } -- cgit v1.2.3-70-g09d2 From 6cad1b1947fb2ba237b0e843cb7900cdc1653294 Mon Sep 17 00:00:00 2001 From: Determinant Date: Fri, 14 Aug 2015 15:22:07 +0800 Subject: use default cuda library path --- nerv/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nerv/Makefile b/nerv/Makefile index 0b433d5..b5d26bd 100644 --- a/nerv/Makefile +++ b/nerv/Makefile @@ -36,8 +36,8 @@ LUA_LIBS := matrix/init.lua io/init.lua init.lua \ io/sgd_buffer.lua INCLUDE := -I $(LUA_INCDIR) -DLUA_USE_APICHECK -CUDA_BASE := /usr/local/cuda-7.0 -#CUDA_BASE := /usr/local/cuda-5.0 +#CUDA_BASE := /usr/local/cuda-7.0 +CUDA_BASE := /usr/local/cuda CUDA_INCLUDE := -I $(CUDA_BASE)/include/ INCLUDE += $(CUDA_INCLUDE) -- cgit v1.2.3-70-g09d2 From 47dba09eeba2463a804e89c9d0aed7b30cc92b4e Mon Sep 17 00:00:00 2001 From: Determinant Date: Tue, 25 Aug 2015 11:38:57 +0800 Subject: use more general implementation for mat:create --- nerv/matrix/generic/cumatrix.c | 10 ---------- nerv/matrix/init.lua | 4 ++++ 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/nerv/matrix/generic/cumatrix.c b/nerv/matrix/generic/cumatrix.c index 4bdf5f0..ab7f7c4 100644 --- a/nerv/matrix/generic/cumatrix.c +++ b/nerv/matrix/generic/cumatrix.c @@ -43,15 +43,6 @@ static int nerv_matrix_(lua_mul)(lua_State *L) { return 0; } -static int nerv_matrix_(lua_create)(lua_State *L) { - Status status; - Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); - Matrix *b = nerv_matrix_(create)(a->nrow, a->ncol, &status); - NERV_LUA_CHECK_STATUS(L, status); - luaT_pushudata(L, b, nerv_matrix_(tname)); - return 1; -} - static int nerv_matrix_(lua_sigmoid)(lua_State *L) { Status status; Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); @@ -289,7 +280,6 @@ static int nerv_matrix_(lua_scale_rows_by_row)(lua_State *L) { } static const luaL_Reg nerv_matrix_(extra_methods)[] = { - {"create", nerv_matrix_(lua_create)}, {"colsum", nerv_matrix_(lua_colsum)}, {"colsame", nerv_matrix_(lua_colsame)}, {"rowsum", nerv_matrix_(lua_rowsum)}, diff --git a/nerv/matrix/init.lua b/nerv/matrix/init.lua index f230e9f..ae9b884 100644 --- a/nerv/matrix/init.lua +++ b/nerv/matrix/init.lua @@ -45,6 +45,10 @@ function nerv.Matrix:generate(gen) end end +function nerv.Matrix:create() + return self.__constructor(self:nrow(), self:ncol()) +end + nerv.MMatrixInt.fmt = "%d " function nerv.CuMatrix:__add__(b) -- cgit v1.2.3-70-g09d2 From ed2a4148dbb9c18f428571b3e2970d7b2adfb058 Mon Sep 17 00:00:00 2001 From: Determinant Date: Tue, 25 Aug 2015 11:47:47 +0800 Subject: add optional parameters to mat:create --- nerv/matrix/init.lua | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nerv/matrix/init.lua b/nerv/matrix/init.lua index ae9b884..1091d7e 100644 --- a/nerv/matrix/init.lua +++ b/nerv/matrix/init.lua @@ -45,8 +45,8 @@ function nerv.Matrix:generate(gen) end end -function nerv.Matrix:create() - return self.__constructor(self:nrow(), self:ncol()) +function nerv.Matrix:create(nrow, ncol) + return self.__constructor(nrow or self:nrow(), ncol or self:ncol()) end nerv.MMatrixInt.fmt = "%d " -- cgit v1.2.3-70-g09d2 From e81e9832ec4f2ad031fd42b5018cea134e8cda7e Mon Sep 17 00:00:00 2001 From: Determinant Date: Wed, 26 Aug 2015 14:26:54 +0800 Subject: move global_transf to asr_trainer.lua --- nerv/examples/asr_trainer.lua | 23 +++++++++++++++++++---- nerv/examples/swb_baseline.lua | 7 ++++--- nerv/examples/swb_baseline_basic.lua | 7 ++++--- nerv/io/sgd_buffer.lua | 2 +- nerv/layer/mse.lua | 2 +- nerv/nn/layer_dag.lua | 27 +++++++++++++++++++++++++++ 6 files changed, 56 insertions(+), 12 deletions(-) diff --git a/nerv/examples/asr_trainer.lua b/nerv/examples/asr_trainer.lua index dcadfa3..5a50542 100644 --- a/nerv/examples/asr_trainer.lua +++ b/nerv/examples/asr_trainer.lua @@ -3,6 +3,7 @@ function build_trainer(ifname) param_repo:import(ifname, nil, gconf) local layer_repo = make_layer_repo(param_repo) local network = get_network(layer_repo) + local global_transf = get_global_transf(layer_repo) local input_order = get_input_order() local iterative_trainer = function (prefix, scp_file, bp) gconf.randomize = bp @@ -24,15 +25,29 @@ function build_trainer(ifname) -- break end local input = {} --- if gconf.cnt == 100 then break end - for i, id in ipairs(input_order) do +-- if gconf.cnt == 1000 then break end + for i, e in ipairs(input_order) do + local id = e.id if data[id] == nil then nerv.error("input data %s not found", id) end - table.insert(input, data[id]) + local transformed + if e.global_transf then + transformed = nerv.speech_utils.global_transf(data[id], + global_transf, + gconf.frm_ext or 0, + gconf.frm_trim or 0, + gconf) + else + transformed = data[id] + end + table.insert(input, transformed) end local output = {nerv.CuMatrixFloat(gconf.batch_size, 1)} - err_output = {input[1]:create()} + err_output = {} + for i = 1, #input do + table.insert(err_output, input[i]:create()) + end network:propagate(input, output) if bp then network:back_propagate(err_input, err_output, input, output) diff --git a/nerv/examples/swb_baseline.lua b/nerv/examples/swb_baseline.lua index 0e9f897..bbc6467 100644 --- a/nerv/examples/swb_baseline.lua +++ b/nerv/examples/swb_baseline.lua @@ -3,6 +3,7 @@ gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9, cumat_type = nerv.CuMatrixFloat, mmat_type = nerv.MMatrixFloat, frm_ext = 5, + frm_trim = 5, tr_scp = "/slfs1/users/mfy43/swb_ivec/train_bp.scp", cv_scp = "/slfs1/users/mfy43/swb_ivec/train_cv.scp", htk_conf = "/slfs1/users/mfy43/swb_ivec/plp_0_d_a.conf", @@ -161,8 +162,7 @@ function make_readers(scp_file, layer_repo) dir = "*/", ext = "lab" } - }, - global_transf = layer_repo:get_layer("global_transf") + } }), data = {main_scp = 429, phone_state = 1}} } @@ -178,7 +178,8 @@ function make_buffer(readers) end function get_input_order() - return {"main_scp", "phone_state"} + return {{id = "main_scp", global_transf = true}, + {id = "phone_state"}} end function get_accuracy(layer_repo) diff --git a/nerv/examples/swb_baseline_basic.lua b/nerv/examples/swb_baseline_basic.lua index c47ec3e..71f04a3 100644 --- a/nerv/examples/swb_baseline_basic.lua +++ b/nerv/examples/swb_baseline_basic.lua @@ -3,6 +3,7 @@ gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9, cumat_type = nerv.CuMatrixFloat, mmat_type = nerv.MMatrixFloat, frm_ext = 5, + frm_trim = 5, tr_scp = "/slfs1/users/mfy43/swb_ivec/train_bp.scp", cv_scp = "/slfs1/users/mfy43/swb_ivec/train_cv.scp", htk_conf = "/slfs1/users/mfy43/swb_ivec/plp_0_d_a.conf", @@ -124,8 +125,7 @@ function make_readers(scp_file, layer_repo) dir = "*/", ext = "lab" } - }, - global_transf = layer_repo:get_layer("global_transf") + } }), data = {main_scp = 429, phone_state = 1}} } @@ -141,7 +141,8 @@ function make_buffer(readers) end function get_input_order() - return {"main_scp", "phone_state"} + return {{id = "main_scp", global_transf = true}, + {id = "phone_state"}} end function get_accuracy(layer_repo) diff --git a/nerv/io/sgd_buffer.lua b/nerv/io/sgd_buffer.lua index 604fa07..f9d281c 100644 --- a/nerv/io/sgd_buffer.lua +++ b/nerv/io/sgd_buffer.lua @@ -91,7 +91,7 @@ function SGDBuffer:get_data() if not self:saturate() then return nil -- the remaining data cannot build a batch end - nerv.info("%.3fs to fill the buffer\n", os.clock() - t) + nerv.info("%.3fs to fill the buffer", os.clock() - t) end if self.head + batch_size > self.tail then return nil -- the remaining data cannot build a batch diff --git a/nerv/layer/mse.lua b/nerv/layer/mse.lua index 9a97add..2516998 100644 --- a/nerv/layer/mse.lua +++ b/nerv/layer/mse.lua @@ -34,7 +34,7 @@ function MSELayer:propagate(input, output) if output[1] ~= nil then output[1]:copy_fromd(mse_sum) end - self.total_mse = self.total_mse + mse_sum:colsum()[0] + self.total_mse = self.total_mse + mse_sum:colsum()[0][0] self.total_frames = self.total_frames + mse_sum:nrow() end diff --git a/nerv/nn/layer_dag.lua b/nerv/nn/layer_dag.lua index e9d4d86..25297c2 100644 --- a/nerv/nn/layer_dag.lua +++ b/nerv/nn/layer_dag.lua @@ -254,3 +254,30 @@ function DAGLayer:get_params() end return nerv.ParamRepo.merge(param_repos) end + +DAGLayer.PORT_TYPES = { + INPUT = {}, + OUTPUT = {}, + ERR_INPUT = {}, + ERR_OUTPUT = {} +} + +function DAGLayer:get_intermediate(id, port_type) + if id == "" or id == "" then + nerv.error("an actual real layer id is expected") + end + local layer = layers[id] + if layer == nil then + nerv.error("layer id %s not found", id) + end + if port_type == DAGLayer.PORT_TYPES.INPUT then + return layer.inputs + elseif port_type == DAGLayer.PORT_TYPES.OUTPUT then + return layer.outputs + elseif port_type == DAGLayer.PORT_TYPES.ERR_INPUT then + return layer.err_inputs + elseif port_type == DAGLayer.PORT_TYPES.ERR_OUTPUT then + return layer.err_outputs + end + nerv.error("unrecognized port type") +end -- cgit v1.2.3-70-g09d2 From e97b97e4c684e7f26064bcc0a6440ac5d6cddc47 Mon Sep 17 00:00:00 2001 From: Determinant Date: Wed, 26 Aug 2015 15:43:00 +0800 Subject: ... --- nerv/examples/asr_trainer.lua | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nerv/examples/asr_trainer.lua b/nerv/examples/asr_trainer.lua index 5a50542..69cfeed 100644 --- a/nerv/examples/asr_trainer.lua +++ b/nerv/examples/asr_trainer.lua @@ -35,8 +35,7 @@ function build_trainer(ifname) if e.global_transf then transformed = nerv.speech_utils.global_transf(data[id], global_transf, - gconf.frm_ext or 0, - gconf.frm_trim or 0, + gconf.frm_ext or 0, 0, gconf) else transformed = data[id] -- cgit v1.2.3-70-g09d2 From 29d1c98bd5dc7608919071efd430fd3a9ac6dc58 Mon Sep 17 00:00:00 2001 From: Yimmon Zhuang Date: Thu, 27 Aug 2015 13:29:29 +0800 Subject: . --- nerv/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nerv/Makefile b/nerv/Makefile index 728d010..7ed140d 100644 --- a/nerv/Makefile +++ b/nerv/Makefile @@ -36,8 +36,8 @@ LUA_LIBS := matrix/init.lua io/init.lua init.lua \ io/sgd_buffer.lua INCLUDE := -I $(LUA_INCDIR) -DLUA_USE_APICHECK -CUDA_BASE := /usr/local/cuda-6.5 -#CUDA_BASE := /usr/local/cuda-5.0 +#CUDA_BASE := /usr/local/cuda-6.5 +CUDA_BASE := /usr/local/cuda-5.0 CUDA_INCLUDE := -I $(CUDA_BASE)/include/ INCLUDE += $(CUDA_INCLUDE) -- cgit v1.2.3-70-g09d2 From 8bf9c7575ffeeabb3924e9e02a35afe187071fe2 Mon Sep 17 00:00:00 2001 From: Determinant Date: Thu, 27 Aug 2015 17:38:09 +0800 Subject: update embedding example to use new global_transf() --- embedding_example/setup_nerv.lua | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/embedding_example/setup_nerv.lua b/embedding_example/setup_nerv.lua index 5ade950..d80c306 100644 --- a/embedding_example/setup_nerv.lua +++ b/embedding_example/setup_nerv.lua @@ -11,12 +11,11 @@ local batch_size = 1 network:init(batch_size) function propagator(input, output) - local transformed = nerv.speech_utils.global_transf(input, - global_transf, 0, gconf) -- preprocessing - local gpu_input = nerv.CuMatrixFloat(transformed:nrow(), transformed:ncol()) + local transformed = nerv.speech_utils.global_transf( + gconf.cumat_type.new_from_host(input), + global_transf, 0, 0, gconf) -- preprocessing + local gpu_input = transformed local gpu_output = nerv.CuMatrixFloat(output:nrow(), output:ncol()) - print(transformed) - gpu_input:copy_fromh(transformed) network:propagate({gpu_input}, {gpu_output}) gpu_output:copy_toh(output) -- collect garbage in-time to save GPU memory -- cgit v1.2.3-70-g09d2 From 1a9f63e351582f54fec7817927168cb1dbb0c1d6 Mon Sep 17 00:00:00 2001 From: Determinant Date: Fri, 28 Aug 2015 13:21:52 +0800 Subject: support gpu buffering --- nerv/examples/swb_baseline.lua | 3 ++- nerv/io/sgd_buffer.lua | 34 +++++++++++++++++++-------- nerv/lib/matrix/cukernel.h | 2 ++ nerv/lib/matrix/cumatrix.c | 1 + nerv/lib/matrix/cumatrix.h | 1 + nerv/lib/matrix/generic/cukernel.cu | 20 ++++++++++++++++ nerv/lib/matrix/generic/cumatrix.c | 19 +++++++++++++-- nerv/lib/matrix/generic/cumatrix.h | 2 ++ nerv/lib/matrix/mmatrix.c | 37 ++++++++++++++--------------- nerv/lib/matrix/mmatrix.h | 3 ++- nerv/matrix/generic/cumatrix.c | 22 ++++++++++++++++-- nerv/matrix/mmatrix.c | 46 +++++++++++++++++++------------------ 12 files changed, 135 insertions(+), 55 deletions(-) diff --git a/nerv/examples/swb_baseline.lua b/nerv/examples/swb_baseline.lua index bbc6467..8015884 100644 --- a/nerv/examples/swb_baseline.lua +++ b/nerv/examples/swb_baseline.lua @@ -173,7 +173,8 @@ function make_buffer(readers) { buffer_size = gconf.buffer_size, randomize = gconf.randomize, - readers = readers + readers = readers, + use_gpu = true }) end diff --git a/nerv/io/sgd_buffer.lua b/nerv/io/sgd_buffer.lua index f9d281c..3f854f0 100644 --- a/nerv/io/sgd_buffer.lua +++ b/nerv/io/sgd_buffer.lua @@ -8,13 +8,29 @@ function SGDBuffer:__init(global_conf, buffer_conf) if self.randomize == nil then self.randomize = false end + local cumat_type = global_conf.cumat_type + if buffer_conf.use_gpu then + self.mat_type = cumat_type + self.copy_rows_from_by_idx = cumat_type.copy_rows_fromd_by_idx + self.copy_from = cumat_type.copy_fromd + self.copy_from_reader = cumat_type.copy_fromh + self.perm_gen = function (x) + return cumat_type.new_from_host(nerv.MMatrixFloat.perm_gen(x)) + end + else + self.mat_type = global_conf.mmat_type + self.copy_rows_from_by_idx = cumat_type.copy_rows_fromh_by_idx + self.copy_from = cumat_type.copy_fromh + self.perm_gen = nerv.MMatrixFloat.perm_gen + self.copy_from_reader = self.mat_type.copy_from + end self.head = 0 self.tail = 0 self.readers = {} for i, reader_spec in ipairs(buffer_conf.readers) do local buffs = {} for id, width in pairs(reader_spec.data) do - buffs[id] = {data = global_conf.mmat_type(self.buffer_size, width), + buffs[id] = {data = self.mat_type(self.buffer_size, width), leftover = nil, width = width} end @@ -41,7 +57,7 @@ function SGDBuffer:saturate() buff.data:copy_from(buff.leftover, 0, lrow) buff.leftover = nil end - nerv.printf("buffer leftover: %d\n", lrow) + nerv.info("buffer leftover: %d\n", lrow) reader.tail = lrow reader.has_leftover = false end @@ -65,21 +81,21 @@ function SGDBuffer:saturate() if d == nil then nerv.error("reader does not provide data for %s", id) end - buff.leftover = self.gconf.mmat_type(drow - remain, - buff.width) - buff.leftover:copy_from(d, remain, drow) + buff.leftover = self.mat_type(drow - remain, + buff.width) + self.copy_from_reader(buff.leftover, d, remain, drow) end drow = remain reader.has_leftover = true end for id, buff in pairs(reader.buffs) do - buff.data:copy_from(data[id], 0, drow, reader.tail) + self.copy_from_reader(buff.data, data[id], 0, drow, reader.tail) end reader.tail = reader.tail + drow end self.tail = math.min(self.tail, reader.tail) end - self.rand_map = nerv.MMatrixInt.perm_gen(self.tail) -- generate shuffled index + self.rand_map = self.perm_gen(self.tail) -- generate shuffled index collectgarbage("collect") return self.tail >= self.gconf.batch_size end @@ -101,9 +117,9 @@ function SGDBuffer:get_data() for id, buff in pairs(reader.buffs) do local batch = self.gconf.cumat_type(batch_size, buff.width) if self.randomize then - batch:copy_rows_fromh_by_idx(buff.data, self.rand_map, self.head) + self.copy_rows_from_by_idx(batch, buff.data, self.rand_map, self.head) else - batch:copy_fromh(buff.data, self.head, self.head + batch_size) + self.copy_from(batch, buff.data, self.head, self.head + batch_size) end res[id] = batch end diff --git a/nerv/lib/matrix/cukernel.h b/nerv/lib/matrix/cukernel.h index 7bb4c2c..2126c6f 100644 --- a/nerv/lib/matrix/cukernel.h +++ b/nerv/lib/matrix/cukernel.h @@ -18,4 +18,6 @@ void cudak_(cuda_rearrange_frm)(const Matrix *a, Matrix *b, int step); void cudak_(cuda_scale_rows_by_row)(const Matrix *a, Matrix *b); void cudak_(cuda_scale_rows_by_col)(const Matrix *a, Matrix *b); void cudak_(cuda_decompress)(const Matrix *a, Matrix *b); +void cudak_(cuda_copy_rows_by_idx)(const Matrix *a, Matrix *b, + const Matrix *idx, int b_begin); #endif diff --git a/nerv/lib/matrix/cumatrix.c b/nerv/lib/matrix/cumatrix.c index aa81bfc..ff1168d 100644 --- a/nerv/lib/matrix/cumatrix.c +++ b/nerv/lib/matrix/cumatrix.c @@ -49,6 +49,7 @@ void nerv_cumatrix_init() { #define NERV_CUBLAS_(NAME) cublasS##NAME #define MATRIX_CUMATRIX_HOST_TNAME nerv_matrix_host_float_tname #include "generic/cumatrix.c" + #undef NERV_CUBLAS_ #undef cudak_ #undef nerv_matrix_ diff --git a/nerv/lib/matrix/cumatrix.h b/nerv/lib/matrix/cumatrix.h index 9f71507..e6def66 100644 --- a/nerv/lib/matrix/cumatrix.h +++ b/nerv/lib/matrix/cumatrix.h @@ -1,5 +1,6 @@ #ifndef NERV_CUMATRIX_H #define NERV_CUMATRIX_H +#include "matrix.h" void nerv_cumatrix_print_profile(); void nerv_cumatrix_clear_profile(); void nerv_cumatrix_init(); diff --git a/nerv/lib/matrix/generic/cukernel.cu b/nerv/lib/matrix/generic/cukernel.cu index e337798..08feb59 100644 --- a/nerv/lib/matrix/generic/cukernel.cu +++ b/nerv/lib/matrix/generic/cukernel.cu @@ -284,6 +284,15 @@ __global__ void cudak_(gen_col_idx)(MATRIX_ELEM *b, b[j + i * stride] = j; } +__global__ void cudak_(copy_rows_by_idx)(const MATRIX_ELEM *a, MATRIX_ELEM *b, + const MATRIX_ELEM *idx, int b_begin, + int nrow, int ncol, int stride) { + int j = blockIdx.x * blockDim.x + threadIdx.x; + int i = blockIdx.y * blockDim.y + threadIdx.y; + if (i >= nrow || j >= ncol) return; + b[j + i * stride] = a[j + lrintf(idx[i]) * stride]; +} + extern "C" { #include "../cukernel.h" void cudak_(cuda_log_elem)(const Matrix *a, Matrix *b) { @@ -589,5 +598,16 @@ extern "C" { b->stride / sizeof(MATRIX_ELEM)); cudaStreamSynchronize(0); } + + void cudak_(cuda_copy_rows_by_idx)(const Matrix *a, Matrix *b, + const Matrix *idx, int b_begin) { + dim3 threadsPerBlock(CUDA_THREADS_NN, 1); + dim3 numBlocks(CEIL_DIV(b->ncol, threadsPerBlock.x), b->nrow); + cudak_(copy_rows_by_idx)<<>> \ + (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b), + MATRIX_ELEM_PTR(idx) + b_begin, + b_begin, b->nrow, b->ncol, b->stride / sizeof(MATRIX_ELEM)); + cudaStreamSynchronize(0); + } } #endif diff --git a/nerv/lib/matrix/generic/cumatrix.c b/nerv/lib/matrix/generic/cumatrix.c index 2cb3563..770e503 100644 --- a/nerv/lib/matrix/generic/cumatrix.c +++ b/nerv/lib/matrix/generic/cumatrix.c @@ -315,7 +315,7 @@ void nerv_matrix_(copy_rows_fromh_by_idx)(Matrix *a, const Matrix *b, long nrow = a->nrow; if (!(0 <= b_begin && b_begin + nrow <= idx->ncol)) NERV_EXIT_STATUS(status, MAT_INVALID_COPY_INTERVAL, 0); - long *idx_ptr = idx->data.i; + float *idx_ptr = idx->data.f; int i; if (idx->nrow != 1) NERV_EXIT_STATUS(status, MAT_IDX_VECTOR_EXP, 0); @@ -325,7 +325,7 @@ void nerv_matrix_(copy_rows_fromh_by_idx)(Matrix *a, const Matrix *b, cudaStream_t *streams = (cudaStream_t*)malloc(sizeof(cudaStream_t) * nrow); for (i = 0; i < nrow; i++) { - int src_row = idx_ptr[b_begin + i]; + int src_row = (int)idx_ptr[b_begin + i]; if (!(0 <= src_row && src_row < b->nrow)) NERV_EXIT_STATUS(status, MAT_INVALID_IDX, 0); CUDA_SAFE_CALL(cudaStreamCreate(streams + i), status); @@ -344,6 +344,21 @@ void nerv_matrix_(copy_rows_fromh_by_idx)(Matrix *a, const Matrix *b, NERV_SET_STATUS(status, NERV_NORMAL, 0); } +void nerv_matrix_(copy_rows_fromd_by_idx)(Matrix *a, const Matrix *b, + const Matrix *idx, int b_begin, Status *status) { + long nrow = a->nrow; + if (!(0 <= b_begin && b_begin + nrow <= idx->ncol)) + NERV_EXIT_STATUS(status, MAT_INVALID_COPY_INTERVAL, 0); + if (idx->nrow != 1) + NERV_EXIT_STATUS(status, MAT_IDX_VECTOR_EXP, 0); + if (a->ncol != b->ncol) + NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0); + PROFILE_START + cudak_(cuda_copy_rows_by_idx)(b, a, idx, b_begin); + PROFILE_STOP + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + void nerv_matrix_(expand_frm)(Matrix *a, const Matrix *b, int context, Status *status) { if (a->nrow != b->nrow) diff --git a/nerv/lib/matrix/generic/cumatrix.h b/nerv/lib/matrix/generic/cumatrix.h index 3f1f8a3..04e8c5a 100644 --- a/nerv/lib/matrix/generic/cumatrix.h +++ b/nerv/lib/matrix/generic/cumatrix.h @@ -40,6 +40,8 @@ void nerv_matrix_(log_elem)(Matrix *b, const Matrix *a, Status *status); Matrix *nerv_matrix_(decompress)(const Matrix *a, int orig_col, Status *status); void nerv_matrix_(copy_rows_fromh_by_idx)(Matrix *a, const Matrix *b, const Matrix *idx, int b_begin, Status *status); +void nerv_matrix_(copy_rows_fromd_by_idx)(Matrix *a, const Matrix *b, + const Matrix *idx, int b_begin, Status *status); void nerv_matrix_(expand_frm)(Matrix *a, const Matrix *b, int context, Status *status); diff --git a/nerv/lib/matrix/mmatrix.c b/nerv/lib/matrix/mmatrix.c index 94f1ea8..b8157eb 100644 --- a/nerv/lib/matrix/mmatrix.c +++ b/nerv/lib/matrix/mmatrix.c @@ -7,6 +7,25 @@ #define nerv_matrix_(NAME) nerv_matrix_host_float_##NAME #include "generic/matrix.h" #include "generic/mmatrix.c" + +Matrix *nerv_matrix_(perm_gen)(int ncol, Status *status) { + int i; + Matrix *self = nerv_matrix_(create)(1, ncol, status); + if (status->err_code != NERV_NORMAL) + return NULL; + float *prow = self->data.f; + for (i = 0; i < ncol; i++) + prow[i] = i; + for (i = ncol - 1; i >= 0; i--) + { + size_t j = rand() % (i + 1); + float tmp = prow[i]; + prow[i] = prow[j]; + prow[j] = tmp; + } + return self; +} + #undef nerv_matrix_ #undef host_matrix_ #undef MATRIX_USE_FLOAT @@ -33,21 +52,3 @@ #define host_matrix_(NAME) host_matrix_int_##NAME #define nerv_matrix_(NAME) nerv_matrix_host_int_##NAME #include "generic/mmatrix.c" - -Matrix *nerv_matrix_(perm_gen)(int ncol, Status *status) { - int i; - Matrix *self = nerv_matrix_(create)(1, ncol, status); - if (status->err_code != NERV_NORMAL) - return NULL; - long *prow = self->data.i; - for (i = 0; i < ncol; i++) - prow[i] = i; - for (i = ncol - 1; i >= 0; i--) - { - size_t j = rand() % (i + 1); - long tmp = prow[i]; - prow[i] = prow[j]; - prow[j] = tmp; - } - return self; -} diff --git a/nerv/lib/matrix/mmatrix.h b/nerv/lib/matrix/mmatrix.h index df91e4c..31e7984 100644 --- a/nerv/lib/matrix/mmatrix.h +++ b/nerv/lib/matrix/mmatrix.h @@ -1,4 +1,5 @@ #ifndef NERV_MMATRIX_H #define NERV_MMATRIX_H -Matrix *nerv_matrix_(perm_gen)(int ncol, Status *status); +#include "matrix.h" +Matrix *nerv_matrix_host_float_perm_gen(int ncol, Status *status); #endif diff --git a/nerv/matrix/generic/cumatrix.c b/nerv/matrix/generic/cumatrix.c index ab7f7c4..08cb4c2 100644 --- a/nerv/matrix/generic/cumatrix.c +++ b/nerv/matrix/generic/cumatrix.c @@ -228,12 +228,12 @@ static int nerv_matrix_(lua_decompress)(lua_State *L) { return 1; } -extern const char *nerv_matrix_host_int_tname; +extern const char *nerv_matrix_host_float_tname; static int nerv_matrix_(lua_copy_rows_fromh_by_idx)(lua_State *L) { Status status; Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); const Matrix *b = luaT_checkudata(L, 2, MATRIX_CUMATRIX_HOST_TNAME); - const Matrix *idx = luaT_checkudata(L, 3, nerv_matrix_host_int_tname); + const Matrix *idx = luaT_checkudata(L, 3, nerv_matrix_host_float_tname); long nrow = a->nrow; int b_begin = lua_gettop(L) > 3 ? luaL_checkinteger(L, 4) : 0; nerv_matrix_(copy_rows_fromh_by_idx)(a, b, idx, b_begin, &status); @@ -241,6 +241,18 @@ static int nerv_matrix_(lua_copy_rows_fromh_by_idx)(lua_State *L) { return 0; } +static int nerv_matrix_(lua_copy_rows_fromd_by_idx)(lua_State *L) { + Status status; + Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); + const Matrix *b = luaT_checkudata(L, 2, nerv_matrix_(tname)); + const Matrix *idx = luaT_checkudata(L, 3, nerv_matrix_(tname)); + long nrow = a->nrow; + int b_begin = lua_gettop(L) > 3 ? luaL_checkinteger(L, 4) : 0; + nerv_matrix_(copy_rows_fromd_by_idx)(a, b, idx, b_begin, &status); + NERV_LUA_CHECK_STATUS(L, status); + return 0; +} + static int nerv_matrix_(lua_expand_frm)(lua_State *L) { Status status; Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); @@ -290,6 +302,8 @@ static const luaL_Reg nerv_matrix_(extra_methods)[] = { /* in-place calc */ {"copy_fromh", nerv_matrix_(lua_copy_fromh)}, {"copy_fromd", nerv_matrix_(lua_copy_fromd)}, + /* alias for copy_fromd */ + {"copy_from", nerv_matrix_(lua_copy_fromd)}, {"copy_toh", nerv_matrix_(lua_copy_toh)}, {"add", nerv_matrix_(lua_add)}, {"mul", nerv_matrix_(lua_mul)}, @@ -302,6 +316,7 @@ static const luaL_Reg nerv_matrix_(extra_methods)[] = { {"mul_elem", nerv_matrix_(lua_mul_elem)}, {"log_elem", nerv_matrix_(lua_log_elem)}, {"copy_rows_fromh_by_idx", nerv_matrix_(lua_copy_rows_fromh_by_idx)}, + {"copy_rows_fromd_by_idx", nerv_matrix_(lua_copy_rows_fromd_by_idx)}, {"expand_frm", nerv_matrix_(lua_expand_frm)}, {"rearrange_frm", nerv_matrix_(lua_rearrange_frm)}, {"scale_rows_by_row", nerv_matrix_(lua_scale_rows_by_row)}, @@ -311,6 +326,9 @@ static const luaL_Reg nerv_matrix_(extra_methods)[] = { static void cuda_matrix_(init)(lua_State *L) { luaN_append_methods(L, nerv_matrix_(extra_methods)); +#ifdef CUMATRIX_INIT + CUMATRIX_INIT(L); +#endif } int nerv_matrix_(lua_get_elem)(lua_State *L) { diff --git a/nerv/matrix/mmatrix.c b/nerv/matrix/mmatrix.c index 5561572..961059c 100644 --- a/nerv/matrix/mmatrix.c +++ b/nerv/matrix/mmatrix.c @@ -16,7 +16,30 @@ void nerv_lua_mmatrix_init(lua_State *L) { #define host_matrix_(NAME) host_matrix_float_##NAME #define nerv_matrix_(NAME) nerv_matrix_host_float_##NAME const char *nerv_matrix_(tname) = "nerv.MMatrixFloat"; +#define MMATRIX_INIT(L) host_matrix_(init_extra)(L) + +static const luaL_Reg nerv_matrix_(extra_methods_int)[]; +static void host_matrix_(init_extra)(lua_State *L) { + luaN_append_methods(L, nerv_matrix_(extra_methods_int)); +} + #include "generic/mmatrix.c" +#include "../lib/matrix/mmatrix.h" + +static int nerv_matrix_(lua_perm_gen)(lua_State *L) { + Status status; + int i, ncol = luaL_checkinteger(L, 1); + Matrix *self = nerv_matrix_(perm_gen)(ncol, &status); + NERV_LUA_CHECK_STATUS(L, status); + luaT_pushudata(L, self, nerv_matrix_(tname)); + return 1; +} + +static const luaL_Reg nerv_matrix_(extra_methods_int)[] = { + {"perm_gen", nerv_matrix_(lua_perm_gen)}, + {NULL, NULL} +}; + #undef nerv_matrix_ #undef host_matrix_ #undef MATRIX_USE_FLOAT @@ -24,6 +47,7 @@ const char *nerv_matrix_(tname) = "nerv.MMatrixFloat"; #undef MATRIX_ELEM_PTR #undef MATRIX_ELEM_FMT #undef MATRIX_ELEM_WRITE_FMT +#undef MMATRIX_INIT #define NERV_GENERIC_MMATRIX #define MATRIX_USE_DOUBLE @@ -44,26 +68,4 @@ const char *nerv_matrix_(tname) = "nerv.MMatrixDouble"; #define host_matrix_(NAME) host_matrix_int_##NAME #define nerv_matrix_(NAME) nerv_matrix_host_int_##NAME const char *nerv_matrix_(tname) = "nerv.MMatrixInt"; -#define MMATRIX_INIT(L) host_matrix_(init_extra)(L) - -static const luaL_Reg nerv_matrix_(extra_methods_int)[]; -static void host_matrix_(init_extra)(lua_State *L) { - luaN_append_methods(L, nerv_matrix_(extra_methods_int)); -} - #include "generic/mmatrix.c" -#include "../lib/matrix/mmatrix.h" - -static int nerv_matrix_(lua_perm_gen)(lua_State *L) { - Status status; - int i, ncol = luaL_checkinteger(L, 1); - Matrix *self = nerv_matrix_(perm_gen)(ncol, &status); - NERV_LUA_CHECK_STATUS(L, status); - luaT_pushudata(L, self, nerv_matrix_(tname)); - return 1; -} - -static const luaL_Reg nerv_matrix_(extra_methods_int)[] = { - {"perm_gen", nerv_matrix_(lua_perm_gen)}, - {NULL, NULL} -}; -- cgit v1.2.3-70-g09d2 From cad144243b898a7bed91c18572bf42944e9db3b3 Mon Sep 17 00:00:00 2001 From: Determinant Date: Sun, 30 Aug 2015 15:14:36 +0800 Subject: ... --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index be8e5b0..8c21bd9 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ cd nerv git clone https://github.com/Nerv-SJTU/nerv-speech.git speech git submodule init && git submodule update make +make speech ``` The `git submodule` command is for the __luajit__ repository inside __Nerv__. Now, you can try to run some example scripts. -- cgit v1.2.3-70-g09d2 From 3721c74d56ffdea43851489617f33cd13b87ab76 Mon Sep 17 00:00:00 2001 From: Determinant Date: Mon, 31 Aug 2015 18:59:22 +0800 Subject: ... --- nerv/io/sgd_buffer.lua | 18 ++++++++++-------- nerv/nn/layer_dag.lua | 2 +- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/nerv/io/sgd_buffer.lua b/nerv/io/sgd_buffer.lua index 3f854f0..74c4934 100644 --- a/nerv/io/sgd_buffer.lua +++ b/nerv/io/sgd_buffer.lua @@ -5,9 +5,7 @@ function SGDBuffer:__init(global_conf, buffer_conf) self.buffer_size = math.floor(buffer_conf.buffer_size / global_conf.batch_size) * global_conf.batch_size self.randomize = buffer_conf.randomize - if self.randomize == nil then - self.randomize = false - end + self.consume = buffer_conf.consume local cumat_type = global_conf.cumat_type if buffer_conf.use_gpu then self.mat_type = cumat_type @@ -104,26 +102,30 @@ function SGDBuffer:get_data() local batch_size = self.gconf.batch_size if self.head >= self.tail then -- buffer is empty local t = os.clock() - if not self:saturate() then + if (not self:saturate()) and (not self.consume) then return nil -- the remaining data cannot build a batch end + if self.tail == self.head then + return nil -- nothing left + end nerv.info("%.3fs to fill the buffer", os.clock() - t) end - if self.head + batch_size > self.tail then + if self.head + batch_size > self.tail and (not self.consume) then return nil -- the remaining data cannot build a batch end + actual_batch_size = math.min(batch_size, self.tail - self.head) local res = {} for i, reader in ipairs(self.readers) do for id, buff in pairs(reader.buffs) do - local batch = self.gconf.cumat_type(batch_size, buff.width) + local batch = self.gconf.cumat_type(actual_batch_size, buff.width) if self.randomize then self.copy_rows_from_by_idx(batch, buff.data, self.rand_map, self.head) else - self.copy_from(batch, buff.data, self.head, self.head + batch_size) + self.copy_from(batch, buff.data, self.head, self.head + actual_batch_size) end res[id] = batch end end - self.head = self.head + batch_size + self.head = self.head + actual_batch_size return res end diff --git a/nerv/nn/layer_dag.lua b/nerv/nn/layer_dag.lua index 25297c2..f69d31c 100644 --- a/nerv/nn/layer_dag.lua +++ b/nerv/nn/layer_dag.lua @@ -266,7 +266,7 @@ function DAGLayer:get_intermediate(id, port_type) if id == "" or id == "" then nerv.error("an actual real layer id is expected") end - local layer = layers[id] + local layer = self.layers[id] if layer == nil then nerv.error("layer id %s not found", id) end -- cgit v1.2.3-70-g09d2 From 37286a08b40f68b544983d8dde4a77ac0b488397 Mon Sep 17 00:00:00 2001 From: Yimmon Zhuang Date: Fri, 18 Sep 2015 22:17:25 +0800 Subject: kaldi mpe training support --- nerv/Makefile | 2 +- nerv/examples/seq_chime.lua | 185 +++++++++++++++++++++++++++++++++++++++ nerv/examples/seq_trainer.lua | 86 ++++++++++++++++++ nerv/layer/affine.lua | 4 + nerv/layer/affine_recurrent.lua | 4 + nerv/layer/bias.lua | 4 + nerv/layer/combiner.lua | 6 ++ nerv/layer/init.lua | 1 + nerv/layer/mpe.lua | 52 +++++++++++ nerv/layer/mse.lua | 8 ++ nerv/layer/sigmoid.lua | 4 + nerv/layer/softmax.lua | 4 + nerv/layer/softmax_ce.lua | 7 ++ nerv/layer/window.lua | 4 + nerv/lib/matrix/generic/matrix.c | 1 + nerv/nn/layer_dag.lua | 40 ++++++++- 16 files changed, 407 insertions(+), 5 deletions(-) create mode 100644 nerv/examples/seq_chime.lua create mode 100644 nerv/examples/seq_trainer.lua create mode 100644 nerv/layer/mpe.lua diff --git a/nerv/Makefile b/nerv/Makefile index b5d26bd..b874a94 100644 --- a/nerv/Makefile +++ b/nerv/Makefile @@ -31,7 +31,7 @@ OBJS := $(CORE_OBJS) $(NERV_OBJS) $(LUAT_OBJS) LIBS := $(INST_LIBDIR)/libnerv.so $(LIB_PATH)/libnervcore.so $(LIB_PATH)/libluaT.so LUA_LIBS := matrix/init.lua io/init.lua init.lua \ layer/init.lua layer/affine.lua layer/sigmoid.lua layer/softmax_ce.lua layer/softmax.lua \ - layer/window.lua layer/bias.lua layer/combiner.lua layer/mse.lua layer/affine_recurrent.lua\ + layer/window.lua layer/bias.lua layer/combiner.lua layer/mse.lua layer/affine_recurrent.lua layer/mpe.lua \ nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/layer_dag.lua \ io/sgd_buffer.lua diff --git a/nerv/examples/seq_chime.lua b/nerv/examples/seq_chime.lua new file mode 100644 index 0000000..be723ca --- /dev/null +++ b/nerv/examples/seq_chime.lua @@ -0,0 +1,185 @@ +require 'kaldi_io' +gconf = {lrate = 0.00001, wcost = 0, momentum = 0.0, + cumat_type = nerv.CuMatrixFloat, + mmat_type = nerv.MMatrixFloat, + frm_ext = 5, + tr_scp = "ark,s,cs:/slfs6/users/ymz09/kaldi/src/featbin/copy-feats scp:/slfs5/users/ymz09/chime/baseline/ASR/exp/tri4a_dnn_tr05_multi_enhanced_smbr/train.scp ark:- |", + initialized_param = {"/slfs6/users/ymz09/nerv-project/nerv/nerv-speech/kaldi_seq/test/chime3_init.nerv", + "/slfs6/users/ymz09/nerv-project/nerv/nerv-speech/kaldi_seq/test/chime3_global_transf.nerv"}, + debug = false} + +function make_layer_repo(param_repo) + local layer_repo = nerv.LayerRepo( + { + -- global transf + ["nerv.BiasLayer"] = + { + blayer1 = {{bias = "bias1"}, {dim_in = {440}, dim_out = {440}}}, + blayer2 = {{bias = "bias2"}, {dim_in = {440}, dim_out = {440}}} + }, + ["nerv.WindowLayer"] = + { + wlayer1 = {{window = "window1"}, {dim_in = {440}, dim_out = {440}}}, + wlayer2 = {{window = "window2"}, {dim_in = {440}, dim_out = {440}}} + }, + -- biased linearity + ["nerv.AffineLayer"] = + { + affine0 = {{ltp = "affine0_ltp", bp = "affine0_bp"}, + {dim_in = {440}, dim_out = {2048}}}, + affine1 = {{ltp = "affine1_ltp", bp = "affine1_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine2 = {{ltp = "affine2_ltp", bp = "affine2_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine3 = {{ltp = "affine3_ltp", bp = "affine3_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine4 = {{ltp = "affine4_ltp", bp = "affine4_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine5 = {{ltp = "affine5_ltp", bp = "affine5_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine6 = {{ltp = "affine6_ltp", bp = "affine6_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine7 = {{ltp = "affine7_ltp", bp = "affine7_bp"}, + {dim_in = {2048}, dim_out = {2011}}} + }, + ["nerv.SigmoidLayer"] = + { + sigmoid0 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid1 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid2 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid3 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid4 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid5 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid6 = {{}, {dim_in = {2048}, dim_out = {2048}}} + }, + ["nerv.MPELayer"] = + { + mpe_crit = {{}, {dim_in = {2011, -1}, dim_out = {1}, + cmd = { + arg = "--class-frame-counts=/slfs5/users/ymz09/chime/baseline/ASR/exp/tri4a_dnn_tr05_multi_enhanced/ali_train_pdf.counts --acoustic-scale=0.1 --lm-scale=1.0 --learn-rate=0.00001 --do-smbr=true --verbose=1", + mdl = "/slfs5/users/ymz09/chime/baseline/ASR/exp/tri4a_dnn_tr05_multi_enhanced_ali/final.mdl", + lat = "scp:/slfs5/users/ymz09/chime/baseline/ASR/exp/tri4a_dnn_tr05_multi_enhanced_denlats/lat.scp", + ali = "ark:gunzip -c /slfs5/users/ymz09/chime/baseline/ASR/exp/tri4a_dnn_tr05_multi_enhanced_ali/ali.*.gz |" + } + } + } + }, + ["nerv.SoftmaxLayer"] = -- softmax for decode output + { + softmax = {{}, {dim_in = {2011}, dim_out = {2011}}} + } + }, param_repo, gconf) + + layer_repo:add_layers( + { + ["nerv.DAGLayer"] = + { + global_transf = {{}, { + dim_in = {440}, dim_out = {440}, + sub_layers = layer_repo, + connections = { + ["[1]"] = "blayer1[1]", + ["blayer1[1]"] = "wlayer1[1]", + ["wlayer1[1]"] = "blayer2[1]", + ["blayer2[1]"] = "wlayer2[1]", + ["wlayer2[1]"] = "[1]" + } + }}, + main = {{}, { + dim_in = {440}, dim_out = {2011}, + sub_layers = layer_repo, + connections = { + ["[1]"] = "affine0[1]", + ["affine0[1]"] = "sigmoid0[1]", + ["sigmoid0[1]"] = "affine1[1]", + ["affine1[1]"] = "sigmoid1[1]", + ["sigmoid1[1]"] = "affine2[1]", + ["affine2[1]"] = "sigmoid2[1]", + ["sigmoid2[1]"] = "affine3[1]", + ["affine3[1]"] = "sigmoid3[1]", + ["sigmoid3[1]"] = "affine4[1]", + ["affine4[1]"] = "sigmoid4[1]", + ["sigmoid4[1]"] = "affine5[1]", + ["affine5[1]"] = "sigmoid5[1]", + ["sigmoid5[1]"] = "affine6[1]", + ["affine6[1]"] = "sigmoid6[1]", + ["sigmoid6[1]"] = "affine7[1]", + ["affine7[1]"] = "[1]" + } + }} + } + }, param_repo, gconf) + + layer_repo:add_layers( + { + ["nerv.DAGLayer"] = + { + mpe_output = {{}, { + dim_in = {440, -1}, dim_out = {1}, + sub_layers = layer_repo, + connections = { + ["[1]"] = "main[1]", + ["main[1]"] = "mpe_crit[1]", + ["[2]"] = "mpe_crit[2]", + ["mpe_crit[1]"] = "[1]" + } + }}, + softmax_output = {{}, { + dim_in = {440}, dim_out = {2011}, + sub_layers = layer_repo, + connections = { + ["[1]"] = "main[1]", + ["main[1]"] = "softmax[1]", + ["softmax[1]"] = "[1]" + } + }} + } + }, param_repo, gconf) + + return layer_repo +end + +function get_network(layer_repo) + return layer_repo:get_layer("mpe_output") +end + +function get_decode_network(layer_repo) + return layer_repo:get_layer("softmax_output") +end + +function get_global_transf(layer_repo) + return layer_repo:get_layer("global_transf") +end + +function make_readers(feature_rspecifier, layer_repo) + return { + {reader = nerv.KaldiReader(gconf, + { + id = "main_scp", + feature_rspecifier = feature_rspecifier, + frm_ext = gconf.frm_ext, + global_transf = layer_repo:get_layer("global_transf"), + mlfs = {} + }) + } + } +end + +function get_input_order() + return {{id = "main_scp", global_transf = true}, + {id = "key"}} +end + +function get_accuracy(layer_repo) + local mpe_crit = layer_repo:get_layer("mpe_crit") + return mpe_crit.total_correct / mpe_crit.total_frames * 100 +end + +function print_stat(layer_repo) + local mpe_crit = layer_repo:get_layer("mpe_crit") + nerv.info("*** training stat begin ***") + nerv.printf("correct:\t\t%d\n", mpe_crit.total_correct) + nerv.printf("frames:\t\t\t%d\n", mpe_crit.total_frames) + nerv.printf("accuracy:\t\t%.3f%%\n", get_accuracy(layer_repo)) + nerv.info("*** training stat end ***") +end diff --git a/nerv/examples/seq_trainer.lua b/nerv/examples/seq_trainer.lua new file mode 100644 index 0000000..df96e68 --- /dev/null +++ b/nerv/examples/seq_trainer.lua @@ -0,0 +1,86 @@ +function build_trainer(ifname) + local param_repo = nerv.ParamRepo() + param_repo:import(ifname, nil, gconf) + local layer_repo = make_layer_repo(param_repo) + local network = get_network(layer_repo) + local global_transf = get_global_transf(layer_repo) + local input_order = get_input_order() + local iterative_trainer = function (prefix, scp_file, bp) + local readers = make_readers(scp_file, layer_repo) + -- initialize the network + network:init(1) + gconf.cnt = 0 + for ri = 1, #readers, 1 do + while true do + local data = readers[ri].reader:get_data() + if data == nil then + break + end + -- prine stat periodically + gconf.cnt = gconf.cnt + 1 + if gconf.cnt == 1000 then + print_stat(layer_repo) + nerv.CuMatrix.print_profile() + nerv.CuMatrix.clear_profile() + gconf.cnt = 0 + -- break + end + local input = {} + -- if gconf.cnt == 1000 then break end + for i, e in ipairs(input_order) do + local id = e.id + if data[id] == nil then + nerv.error("input data %s not found", id) + end + local transformed + if e.global_transf then + local batch = gconf.cumat_type(data[id]:nrow(), data[id]:ncol()) + batch:copy_fromh(data[id]) + transformed = nerv.speech_utils.global_transf(batch, + global_transf, + gconf.frm_ext or 0, 0, + gconf) + else + transformed = data[id] + end + table.insert(input, transformed) + end + err_output = {input[1]:create()} + network:batch_resize(input[1]:nrow()) + if network:propagate(input, {{}}) == true then + network:back_propagate({{}}, err_output, input, {{}}) + network:update({{}}, input, {{}}) + end + -- collect garbage in-time to save GPU memory + collectgarbage("collect") + end + end + print_stat(layer_repo) + nerv.CuMatrix.print_profile() + nerv.CuMatrix.clear_profile() + if prefix ~= nil then + nerv.info("writing back...") + local fname = string.format("%s_tr%.3f.nerv", + prefix, get_accuracy(layer_repo)) + network:get_params():export(fname, nil) + end + return get_accuracy(layer_repo) + end + return iterative_trainer +end + +dofile(arg[1]) + +local pf0 = gconf.initialized_param +local trainer = build_trainer(pf0) + +local i = 1 +nerv.info("[NN] begin iteration %d with lrate = %.6f", i, gconf.lrate) +local accu_tr = trainer(string.format("%s_%s_iter_%d_lr%f", +string.gsub( +(string.gsub(pf0[1], "(.*/)(.*)", "%2")), +"(.*)%..*", "%1"), +os.date("%Y%m%d%H%M%S"), +i, gconf.lrate), gconf.tr_scp, true) +nerv.info("[TR] training set %d: %.3f", i, accu_tr) + diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua index 00cbcfb..6c90e3e 100644 --- a/nerv/layer/affine.lua +++ b/nerv/layer/affine.lua @@ -60,6 +60,10 @@ function AffineLayer:init(batch_size) self.bp:train_init() end +function AffineLayer:batch_resize(batch_size) + -- do nothing +end + function AffineLayer:update(bp_err, input, output) if self.direct_update then self.ltp.correction:mul(input[1], bp_err[1], 1.0, gconf.momentum, 'T', 'N') diff --git a/nerv/layer/affine_recurrent.lua b/nerv/layer/affine_recurrent.lua index 59d259c..92d98e2 100644 --- a/nerv/layer/affine_recurrent.lua +++ b/nerv/layer/affine_recurrent.lua @@ -37,6 +37,10 @@ function Recurrent:init(batch_size) self.bp:train_init() end +function Recurrent:batch_resize(batch_size) + -- do nothing +end + function Recurrent:update(bp_err, input, output) if (self.direct_update == true) then local ltp_hh = self.ltp_hh.trans diff --git a/nerv/layer/bias.lua b/nerv/layer/bias.lua index c99274d..7e9fd46 100644 --- a/nerv/layer/bias.lua +++ b/nerv/layer/bias.lua @@ -18,6 +18,10 @@ function BiasLayer:init() end end +function BiasLayer:batch_resize(batch_size) + -- do nothing +end + function BiasLayer:propagate(input, output) output[1]:copy_fromd(input[1]) output[1]:add_row(self.bias.trans, 1.0) diff --git a/nerv/layer/combiner.lua b/nerv/layer/combiner.lua index 7bd7617..1bcfdfb 100644 --- a/nerv/layer/combiner.lua +++ b/nerv/layer/combiner.lua @@ -30,6 +30,12 @@ function CombinerLayer:init(batch_size) self.sum = self.gconf.cumat_type(batch_size, dim) end +function CombinerLayer:batch_resize(batch_size) + if self.sum:nrow() ~= batch_size then + self.sum = self.gconf.cumat_type(batch_size, self.dim_in[1]) + end +end + function CombinerLayer:update(bp_err, input, output) end diff --git a/nerv/layer/init.lua b/nerv/layer/init.lua index 6861b0e..b74422f 100644 --- a/nerv/layer/init.lua +++ b/nerv/layer/init.lua @@ -79,3 +79,4 @@ nerv.include('mse.lua') nerv.include('combiner.lua') nerv.include('affine_recurrent.lua') nerv.include('softmax.lua') +nerv.include('mpe.lua') diff --git a/nerv/layer/mpe.lua b/nerv/layer/mpe.lua new file mode 100644 index 0000000..ec8a8f3 --- /dev/null +++ b/nerv/layer/mpe.lua @@ -0,0 +1,52 @@ +require 'libkaldiseq' +local MPELayer = nerv.class("nerv.MPELayer", "nerv.Layer") + +function MPELayer:__init(id, global_conf, layer_conf) + self.id = id + self.gconf = global_conf + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self.arg = layer_conf.cmd.arg + self.mdl = layer_conf.cmd.mdl + self.lat = layer_conf.cmd.lat + self.ali = layer_conf.cmd.ali + self:check_dim_len(2, -1) -- two inputs: nn output and utt key +end + +function MPELayer:init(batch_size) + self.total_correct = 0 + self.total_frames = 0 + self.kaldi_mpe = nerv.KaldiMPE(self.arg, self.mdl, self.lat, self.ali) + if self.kaldi_mpe == nil then + nerv.error("kaldi arguments is expected: %s %s %s %s", self.arg, + self.mdl, self.lat, self.ali) + end +end + +function MPELayer:batch_resize(batch_size) + -- do nothing +end + +function MPELayer:update(bp_err, input, output) + -- no params, therefore do nothing +end + +function MPELayer:propagate(input, output) + self.valid = false + self.valid = self.kaldi_mpe:check(input[1], input[2]) + return self.valid +end + +function MPELayer:back_propagate(bp_err, next_bp_err, input, output) + if self.valid ~= true then + nerv.error("kaldi sequence training back_propagate fail") + end + local mmat = input[1]:new_to_host() + next_bp_err[1]:copy_fromh(self.kaldi_mpe:calc_diff(mmat, input[2])) + self.total_frames = self.total_frames + self.kaldi_mpe:get_num_frames() + self.total_correct = self.total_correct + self.kaldi_mpe:get_utt_frame_acc() +end + +function MPELayer:get_params() + return nerv.ParamRepo({}) +end diff --git a/nerv/layer/mse.lua b/nerv/layer/mse.lua index 2516998..0ee3080 100644 --- a/nerv/layer/mse.lua +++ b/nerv/layer/mse.lua @@ -20,6 +20,14 @@ function MSELayer:init(batch_size) self.diff = self.mse:create() end +function MSELayer:batch_resize(batch_size) + if self.mse:nrow() ~= batch_resize then + self.mse = self.gconf.cumat_type(batch_size, self.dim_in[1]) + self.mse_sum = self.gconf.cumat_type(batch_size, 1) + self.diff = self.mse:create() + end +end + function MSELayer:update(bp_err, input, output) -- no params, therefore do nothing end diff --git a/nerv/layer/sigmoid.lua b/nerv/layer/sigmoid.lua index dfd09eb..0a8bcdc 100644 --- a/nerv/layer/sigmoid.lua +++ b/nerv/layer/sigmoid.lua @@ -14,6 +14,10 @@ function SigmoidLayer:init() end end +function SigmoidLayer:batch_resize(batch_size) + -- do nothing +end + function SigmoidLayer:update(bp_err, input, output) -- no params, therefore do nothing end diff --git a/nerv/layer/softmax.lua b/nerv/layer/softmax.lua index e979ebf..4205b66 100644 --- a/nerv/layer/softmax.lua +++ b/nerv/layer/softmax.lua @@ -14,6 +14,10 @@ function SoftmaxLayer:init(batch_size) end end +function SoftmaxLayer:batch_resize(batch_size) + -- do nothing +end + function SoftmaxLayer:update(bp_err, input, output) -- no params, therefore do nothing end diff --git a/nerv/layer/softmax_ce.lua b/nerv/layer/softmax_ce.lua index f878a2f..9071e86 100644 --- a/nerv/layer/softmax_ce.lua +++ b/nerv/layer/softmax_ce.lua @@ -23,6 +23,13 @@ function SoftmaxCELayer:init(batch_size) self.ce = self.softmax:create() end +function SoftmaxCELayer:batch_resize(batch_size) + if self.softmax:nrow() ~= batch_resize then + self.softmax = self.gconf.cumat_type(batch_size, self.dim_in[1]) + self.ce = self.softmax:create() + end +end + function SoftmaxCELayer:update(bp_err, input, output) -- no params, therefore do nothing end diff --git a/nerv/layer/window.lua b/nerv/layer/window.lua index 4e9a3b1..8eed352 100644 --- a/nerv/layer/window.lua +++ b/nerv/layer/window.lua @@ -18,6 +18,10 @@ function WindowLayer:init() end end +function WindowLayer:batch_resize(batch_size) + -- do nothing +end + function WindowLayer:propagate(input, output) output[1]:copy_fromd(input[1]) output[1]:scale_rows_by_row(self.window.trans) diff --git a/nerv/lib/matrix/generic/matrix.c b/nerv/lib/matrix/generic/matrix.c index 6cb3dc0..4319e13 100644 --- a/nerv/lib/matrix/generic/matrix.c +++ b/nerv/lib/matrix/generic/matrix.c @@ -4,6 +4,7 @@ /* FIXME: malloc failure detection */ void nerv_matrix_(data_free)(Matrix *self, Status *status) { + if(*self->data_ref == 0) return; assert(*self->data_ref > 0); if (--(*self->data_ref) == 0) { diff --git a/nerv/nn/layer_dag.lua b/nerv/nn/layer_dag.lua index f69d31c..73bb77d 100644 --- a/nerv/nn/layer_dag.lua +++ b/nerv/nn/layer_dag.lua @@ -79,7 +79,7 @@ function DAGLayer:__init(id, global_conf, layer_conf) end table.insert(parsed_conn, - {{ref_from, port_from}, {ref_to, port_to}}) + {{ref_from, port_from}, {ref_to, port_to}}) table.insert(ref_from.next_layers, ref_to) -- add edge ref_to.in_deg = ref_to.in_deg + 1 -- increase the in-degree of the target layer end @@ -140,8 +140,11 @@ function DAGLayer:init(batch_size) ref_from, port_from = unpack(conn[1]) ref_to, port_to = unpack(conn[2]) _, output_dim = ref_from.layer:get_dim() - local mid = self.gconf.cumat_type(batch_size, - output_dim[port_from]) + local dim = 1 + if output_dim[port_from] > 0 then + dim = output_dim[port_from] + end + local mid = self.gconf.cumat_type(batch_size, dim) local err_mid = mid:create() ref_from.outputs[port_from] = mid @@ -176,6 +179,33 @@ function DAGLayer:init(batch_size) end end +function DAGLayer:batch_resize(batch_size) + self.gconf.batch_size = batch_size + + for i, conn in ipairs(self.parsed_conn) do + local _, output_dim + local ref_from, port_from, ref_to, port_to + ref_from, port_from = unpack(conn[1]) + ref_to, port_to = unpack(conn[2]) + _, output_dim = ref_from.layer:get_dim() + + if ref_from.outputs[port_from]:nrow() ~= batch_size and output_dim[port_from] > 0 then + local mid = self.gconf.cumat_type(batch_size, output_dim[port_from]) + local err_mid = mid:create() + + ref_from.outputs[port_from] = mid + ref_to.inputs[port_to] = mid + + ref_from.err_inputs[port_from] = err_mid + ref_to.err_outputs[port_to] = err_mid + end + end + for id, ref in pairs(self.layers) do + ref.layer:batch_resize(batch_size) + end + collectgarbage("collect") +end + function DAGLayer:set_inputs(input) for i = 1, #self.dim_in do if input[i] == nil then @@ -228,11 +258,13 @@ end function DAGLayer:propagate(input, output) self:set_inputs(input) self:set_outputs(output) + local ret = false for i = 1, #self.queue do local ref = self.queue[i] -- print(ref.layer.id) - ref.layer:propagate(ref.inputs, ref.outputs) + ret = ref.layer:propagate(ref.inputs, ref.outputs) end + return ret end function DAGLayer:back_propagate(bp_err, next_bp_err, input, output) -- cgit v1.2.3-70-g09d2 From 7975592b94d65b6f356093694a76201de62a7a6a Mon Sep 17 00:00:00 2001 From: Yimmon Zhuang Date: Thu, 8 Oct 2015 22:27:58 +0800 Subject: MMI support --- nerv/Makefile | 3 +- nerv/examples/mmi_chime3.lua | 182 ++++++++++++++++++++++++++++++++++++++++++ nerv/examples/mpe_chime3.lua | 185 +++++++++++++++++++++++++++++++++++++++++++ nerv/examples/seq_chime.lua | 185 ------------------------------------------- nerv/layer/init.lua | 1 + nerv/layer/mmi.lua | 50 ++++++++++++ 6 files changed, 420 insertions(+), 186 deletions(-) create mode 100644 nerv/examples/mmi_chime3.lua create mode 100644 nerv/examples/mpe_chime3.lua delete mode 100644 nerv/examples/seq_chime.lua create mode 100644 nerv/layer/mmi.lua diff --git a/nerv/Makefile b/nerv/Makefile index b874a94..ce178a0 100644 --- a/nerv/Makefile +++ b/nerv/Makefile @@ -31,7 +31,8 @@ OBJS := $(CORE_OBJS) $(NERV_OBJS) $(LUAT_OBJS) LIBS := $(INST_LIBDIR)/libnerv.so $(LIB_PATH)/libnervcore.so $(LIB_PATH)/libluaT.so LUA_LIBS := matrix/init.lua io/init.lua init.lua \ layer/init.lua layer/affine.lua layer/sigmoid.lua layer/softmax_ce.lua layer/softmax.lua \ - layer/window.lua layer/bias.lua layer/combiner.lua layer/mse.lua layer/affine_recurrent.lua layer/mpe.lua \ + layer/window.lua layer/bias.lua layer/combiner.lua layer/mse.lua layer/affine_recurrent.lua \ + layer/mpe.lua layer/mmi.lua \ nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/layer_dag.lua \ io/sgd_buffer.lua diff --git a/nerv/examples/mmi_chime3.lua b/nerv/examples/mmi_chime3.lua new file mode 100644 index 0000000..a7ad268 --- /dev/null +++ b/nerv/examples/mmi_chime3.lua @@ -0,0 +1,182 @@ +require 'kaldi_io' +gconf = {lrate = 0.00001, wcost = 0, momentum = 0.0, + cumat_type = nerv.CuMatrixFloat, + mmat_type = nerv.MMatrixFloat, + frm_ext = 5, + tr_scp = "ark,o:/slfs6/users/ymz09/kaldi/src/featbin/copy-feats scp:/slfs5/users/ymz09/chime/baseline/ASR/exp/tri4a_dnn_tr05_multi_enhanced_mmi/train.scp ark:- |", + initialized_param = {"/slfs6/users/ymz09/nerv-project/nerv/nerv-speech/kaldi_seq/test/chime3_init_mmi.nerv", + "/slfs6/users/ymz09/nerv-project/nerv/nerv-speech/kaldi_seq/test/chime3_global_transf_mmi.nerv"}, + debug = false} + +function make_layer_repo(param_repo) + local layer_repo = nerv.LayerRepo( + { + -- global transf + ["nerv.BiasLayer"] = + { + blayer1 = {{bias = "bias1"}, {dim_in = {440}, dim_out = {440}}}, + blayer2 = {{bias = "bias2"}, {dim_in = {440}, dim_out = {440}}} + }, + ["nerv.WindowLayer"] = + { + wlayer1 = {{window = "window1"}, {dim_in = {440}, dim_out = {440}}}, + wlayer2 = {{window = "window2"}, {dim_in = {440}, dim_out = {440}}} + }, + -- biased linearity + ["nerv.AffineLayer"] = + { + affine0 = {{ltp = "affine0_ltp", bp = "affine0_bp"}, + {dim_in = {440}, dim_out = {2048}}}, + affine1 = {{ltp = "affine1_ltp", bp = "affine1_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine2 = {{ltp = "affine2_ltp", bp = "affine2_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine3 = {{ltp = "affine3_ltp", bp = "affine3_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine4 = {{ltp = "affine4_ltp", bp = "affine4_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine5 = {{ltp = "affine5_ltp", bp = "affine5_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine6 = {{ltp = "affine6_ltp", bp = "affine6_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine7 = {{ltp = "affine7_ltp", bp = "affine7_bp"}, + {dim_in = {2048}, dim_out = {2011}}} + }, + ["nerv.SigmoidLayer"] = + { + sigmoid0 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid1 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid2 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid3 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid4 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid5 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid6 = {{}, {dim_in = {2048}, dim_out = {2048}}} + }, + ["nerv.MMILayer"] = + { + mmi_crit = {{}, {dim_in = {2011, -1}, dim_out = {1}, + cmd = { + arg = "--class-frame-counts=/slfs5/users/ymz09/chime/baseline/ASR/exp/tri4a_dnn_tr05_multi_enhanced/ali_train_pdf.counts --acoustic-scale=0.1 --lm-scale=1.0 --learn-rate=0.00001 --drop-frames=true --verbose=1", + mdl = "/slfs5/users/ymz09/chime/baseline/ASR/exp/tri4a_dnn_tr05_multi_enhanced_ali/final.mdl", + lat = "scp:/slfs5/users/ymz09/chime/baseline/ASR/exp/tri4a_dnn_tr05_multi_enhanced_denlats/lat.scp", + ali = "ark:gunzip -c /slfs5/users/ymz09/chime/baseline/ASR/exp/tri4a_dnn_tr05_multi_enhanced_ali/ali.*.gz |" + } + } + } + }, + ["nerv.SoftmaxLayer"] = -- softmax for decode output + { + softmax = {{}, {dim_in = {2011}, dim_out = {2011}}} + } + }, param_repo, gconf) + + layer_repo:add_layers( + { + ["nerv.DAGLayer"] = + { + global_transf = {{}, { + dim_in = {440}, dim_out = {440}, + sub_layers = layer_repo, + connections = { + ["[1]"] = "blayer1[1]", + ["blayer1[1]"] = "wlayer1[1]", + ["wlayer1[1]"] = "blayer2[1]", + ["blayer2[1]"] = "wlayer2[1]", + ["wlayer2[1]"] = "[1]" + } + }}, + main = {{}, { + dim_in = {440}, dim_out = {2011}, + sub_layers = layer_repo, + connections = { + ["[1]"] = "affine0[1]", + ["affine0[1]"] = "sigmoid0[1]", + ["sigmoid0[1]"] = "affine1[1]", + ["affine1[1]"] = "sigmoid1[1]", + ["sigmoid1[1]"] = "affine2[1]", + ["affine2[1]"] = "sigmoid2[1]", + ["sigmoid2[1]"] = "affine3[1]", + ["affine3[1]"] = "sigmoid3[1]", + ["sigmoid3[1]"] = "affine4[1]", + ["affine4[1]"] = "sigmoid4[1]", + ["sigmoid4[1]"] = "affine5[1]", + ["affine5[1]"] = "sigmoid5[1]", + ["sigmoid5[1]"] = "affine6[1]", + ["affine6[1]"] = "sigmoid6[1]", + ["sigmoid6[1]"] = "affine7[1]", + ["affine7[1]"] = "[1]" + } + }} + } + }, param_repo, gconf) + + layer_repo:add_layers( + { + ["nerv.DAGLayer"] = + { + mmi_output = {{}, { + dim_in = {440, -1}, dim_out = {1}, + sub_layers = layer_repo, + connections = { + ["[1]"] = "main[1]", + ["main[1]"] = "mmi_crit[1]", + ["[2]"] = "mmi_crit[2]", + ["mmi_crit[1]"] = "[1]" + } + }}, + softmax_output = {{}, { + dim_in = {440}, dim_out = {2011}, + sub_layers = layer_repo, + connections = { + ["[1]"] = "main[1]", + ["main[1]"] = "softmax[1]", + ["softmax[1]"] = "[1]" + } + }} + } + }, param_repo, gconf) + + return layer_repo +end + +function get_network(layer_repo) + return layer_repo:get_layer("mmi_output") +end + +function get_decode_network(layer_repo) + return layer_repo:get_layer("softmax_output") +end + +function get_global_transf(layer_repo) + return layer_repo:get_layer("global_transf") +end + +function make_readers(feature_rspecifier, layer_repo) + return { + {reader = nerv.KaldiReader(gconf, + { + id = "main_scp", + feature_rspecifier = feature_rspecifier, + frm_ext = gconf.frm_ext, + global_transf = layer_repo:get_layer("global_transf"), + mlfs = {} + }) + } + } +end + +function get_input_order() + return {{id = "main_scp", global_transf = true}, + {id = "key"}} +end + +function get_accuracy(layer_repo) + return 0 +end + +function print_stat(layer_repo) + local mmi_crit = layer_repo:get_layer("mmi_crit") + nerv.info("*** training stat begin ***") + nerv.printf("frames:\t\t\t%d\n", mmi_crit.total_frames) + nerv.info("*** training stat end ***") +end diff --git a/nerv/examples/mpe_chime3.lua b/nerv/examples/mpe_chime3.lua new file mode 100644 index 0000000..be723ca --- /dev/null +++ b/nerv/examples/mpe_chime3.lua @@ -0,0 +1,185 @@ +require 'kaldi_io' +gconf = {lrate = 0.00001, wcost = 0, momentum = 0.0, + cumat_type = nerv.CuMatrixFloat, + mmat_type = nerv.MMatrixFloat, + frm_ext = 5, + tr_scp = "ark,s,cs:/slfs6/users/ymz09/kaldi/src/featbin/copy-feats scp:/slfs5/users/ymz09/chime/baseline/ASR/exp/tri4a_dnn_tr05_multi_enhanced_smbr/train.scp ark:- |", + initialized_param = {"/slfs6/users/ymz09/nerv-project/nerv/nerv-speech/kaldi_seq/test/chime3_init.nerv", + "/slfs6/users/ymz09/nerv-project/nerv/nerv-speech/kaldi_seq/test/chime3_global_transf.nerv"}, + debug = false} + +function make_layer_repo(param_repo) + local layer_repo = nerv.LayerRepo( + { + -- global transf + ["nerv.BiasLayer"] = + { + blayer1 = {{bias = "bias1"}, {dim_in = {440}, dim_out = {440}}}, + blayer2 = {{bias = "bias2"}, {dim_in = {440}, dim_out = {440}}} + }, + ["nerv.WindowLayer"] = + { + wlayer1 = {{window = "window1"}, {dim_in = {440}, dim_out = {440}}}, + wlayer2 = {{window = "window2"}, {dim_in = {440}, dim_out = {440}}} + }, + -- biased linearity + ["nerv.AffineLayer"] = + { + affine0 = {{ltp = "affine0_ltp", bp = "affine0_bp"}, + {dim_in = {440}, dim_out = {2048}}}, + affine1 = {{ltp = "affine1_ltp", bp = "affine1_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine2 = {{ltp = "affine2_ltp", bp = "affine2_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine3 = {{ltp = "affine3_ltp", bp = "affine3_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine4 = {{ltp = "affine4_ltp", bp = "affine4_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine5 = {{ltp = "affine5_ltp", bp = "affine5_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine6 = {{ltp = "affine6_ltp", bp = "affine6_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine7 = {{ltp = "affine7_ltp", bp = "affine7_bp"}, + {dim_in = {2048}, dim_out = {2011}}} + }, + ["nerv.SigmoidLayer"] = + { + sigmoid0 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid1 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid2 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid3 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid4 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid5 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid6 = {{}, {dim_in = {2048}, dim_out = {2048}}} + }, + ["nerv.MPELayer"] = + { + mpe_crit = {{}, {dim_in = {2011, -1}, dim_out = {1}, + cmd = { + arg = "--class-frame-counts=/slfs5/users/ymz09/chime/baseline/ASR/exp/tri4a_dnn_tr05_multi_enhanced/ali_train_pdf.counts --acoustic-scale=0.1 --lm-scale=1.0 --learn-rate=0.00001 --do-smbr=true --verbose=1", + mdl = "/slfs5/users/ymz09/chime/baseline/ASR/exp/tri4a_dnn_tr05_multi_enhanced_ali/final.mdl", + lat = "scp:/slfs5/users/ymz09/chime/baseline/ASR/exp/tri4a_dnn_tr05_multi_enhanced_denlats/lat.scp", + ali = "ark:gunzip -c /slfs5/users/ymz09/chime/baseline/ASR/exp/tri4a_dnn_tr05_multi_enhanced_ali/ali.*.gz |" + } + } + } + }, + ["nerv.SoftmaxLayer"] = -- softmax for decode output + { + softmax = {{}, {dim_in = {2011}, dim_out = {2011}}} + } + }, param_repo, gconf) + + layer_repo:add_layers( + { + ["nerv.DAGLayer"] = + { + global_transf = {{}, { + dim_in = {440}, dim_out = {440}, + sub_layers = layer_repo, + connections = { + ["[1]"] = "blayer1[1]", + ["blayer1[1]"] = "wlayer1[1]", + ["wlayer1[1]"] = "blayer2[1]", + ["blayer2[1]"] = "wlayer2[1]", + ["wlayer2[1]"] = "[1]" + } + }}, + main = {{}, { + dim_in = {440}, dim_out = {2011}, + sub_layers = layer_repo, + connections = { + ["[1]"] = "affine0[1]", + ["affine0[1]"] = "sigmoid0[1]", + ["sigmoid0[1]"] = "affine1[1]", + ["affine1[1]"] = "sigmoid1[1]", + ["sigmoid1[1]"] = "affine2[1]", + ["affine2[1]"] = "sigmoid2[1]", + ["sigmoid2[1]"] = "affine3[1]", + ["affine3[1]"] = "sigmoid3[1]", + ["sigmoid3[1]"] = "affine4[1]", + ["affine4[1]"] = "sigmoid4[1]", + ["sigmoid4[1]"] = "affine5[1]", + ["affine5[1]"] = "sigmoid5[1]", + ["sigmoid5[1]"] = "affine6[1]", + ["affine6[1]"] = "sigmoid6[1]", + ["sigmoid6[1]"] = "affine7[1]", + ["affine7[1]"] = "[1]" + } + }} + } + }, param_repo, gconf) + + layer_repo:add_layers( + { + ["nerv.DAGLayer"] = + { + mpe_output = {{}, { + dim_in = {440, -1}, dim_out = {1}, + sub_layers = layer_repo, + connections = { + ["[1]"] = "main[1]", + ["main[1]"] = "mpe_crit[1]", + ["[2]"] = "mpe_crit[2]", + ["mpe_crit[1]"] = "[1]" + } + }}, + softmax_output = {{}, { + dim_in = {440}, dim_out = {2011}, + sub_layers = layer_repo, + connections = { + ["[1]"] = "main[1]", + ["main[1]"] = "softmax[1]", + ["softmax[1]"] = "[1]" + } + }} + } + }, param_repo, gconf) + + return layer_repo +end + +function get_network(layer_repo) + return layer_repo:get_layer("mpe_output") +end + +function get_decode_network(layer_repo) + return layer_repo:get_layer("softmax_output") +end + +function get_global_transf(layer_repo) + return layer_repo:get_layer("global_transf") +end + +function make_readers(feature_rspecifier, layer_repo) + return { + {reader = nerv.KaldiReader(gconf, + { + id = "main_scp", + feature_rspecifier = feature_rspecifier, + frm_ext = gconf.frm_ext, + global_transf = layer_repo:get_layer("global_transf"), + mlfs = {} + }) + } + } +end + +function get_input_order() + return {{id = "main_scp", global_transf = true}, + {id = "key"}} +end + +function get_accuracy(layer_repo) + local mpe_crit = layer_repo:get_layer("mpe_crit") + return mpe_crit.total_correct / mpe_crit.total_frames * 100 +end + +function print_stat(layer_repo) + local mpe_crit = layer_repo:get_layer("mpe_crit") + nerv.info("*** training stat begin ***") + nerv.printf("correct:\t\t%d\n", mpe_crit.total_correct) + nerv.printf("frames:\t\t\t%d\n", mpe_crit.total_frames) + nerv.printf("accuracy:\t\t%.3f%%\n", get_accuracy(layer_repo)) + nerv.info("*** training stat end ***") +end diff --git a/nerv/examples/seq_chime.lua b/nerv/examples/seq_chime.lua deleted file mode 100644 index be723ca..0000000 --- a/nerv/examples/seq_chime.lua +++ /dev/null @@ -1,185 +0,0 @@ -require 'kaldi_io' -gconf = {lrate = 0.00001, wcost = 0, momentum = 0.0, - cumat_type = nerv.CuMatrixFloat, - mmat_type = nerv.MMatrixFloat, - frm_ext = 5, - tr_scp = "ark,s,cs:/slfs6/users/ymz09/kaldi/src/featbin/copy-feats scp:/slfs5/users/ymz09/chime/baseline/ASR/exp/tri4a_dnn_tr05_multi_enhanced_smbr/train.scp ark:- |", - initialized_param = {"/slfs6/users/ymz09/nerv-project/nerv/nerv-speech/kaldi_seq/test/chime3_init.nerv", - "/slfs6/users/ymz09/nerv-project/nerv/nerv-speech/kaldi_seq/test/chime3_global_transf.nerv"}, - debug = false} - -function make_layer_repo(param_repo) - local layer_repo = nerv.LayerRepo( - { - -- global transf - ["nerv.BiasLayer"] = - { - blayer1 = {{bias = "bias1"}, {dim_in = {440}, dim_out = {440}}}, - blayer2 = {{bias = "bias2"}, {dim_in = {440}, dim_out = {440}}} - }, - ["nerv.WindowLayer"] = - { - wlayer1 = {{window = "window1"}, {dim_in = {440}, dim_out = {440}}}, - wlayer2 = {{window = "window2"}, {dim_in = {440}, dim_out = {440}}} - }, - -- biased linearity - ["nerv.AffineLayer"] = - { - affine0 = {{ltp = "affine0_ltp", bp = "affine0_bp"}, - {dim_in = {440}, dim_out = {2048}}}, - affine1 = {{ltp = "affine1_ltp", bp = "affine1_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine2 = {{ltp = "affine2_ltp", bp = "affine2_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine3 = {{ltp = "affine3_ltp", bp = "affine3_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine4 = {{ltp = "affine4_ltp", bp = "affine4_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine5 = {{ltp = "affine5_ltp", bp = "affine5_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine6 = {{ltp = "affine6_ltp", bp = "affine6_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine7 = {{ltp = "affine7_ltp", bp = "affine7_bp"}, - {dim_in = {2048}, dim_out = {2011}}} - }, - ["nerv.SigmoidLayer"] = - { - sigmoid0 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid1 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid2 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid3 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid4 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid5 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid6 = {{}, {dim_in = {2048}, dim_out = {2048}}} - }, - ["nerv.MPELayer"] = - { - mpe_crit = {{}, {dim_in = {2011, -1}, dim_out = {1}, - cmd = { - arg = "--class-frame-counts=/slfs5/users/ymz09/chime/baseline/ASR/exp/tri4a_dnn_tr05_multi_enhanced/ali_train_pdf.counts --acoustic-scale=0.1 --lm-scale=1.0 --learn-rate=0.00001 --do-smbr=true --verbose=1", - mdl = "/slfs5/users/ymz09/chime/baseline/ASR/exp/tri4a_dnn_tr05_multi_enhanced_ali/final.mdl", - lat = "scp:/slfs5/users/ymz09/chime/baseline/ASR/exp/tri4a_dnn_tr05_multi_enhanced_denlats/lat.scp", - ali = "ark:gunzip -c /slfs5/users/ymz09/chime/baseline/ASR/exp/tri4a_dnn_tr05_multi_enhanced_ali/ali.*.gz |" - } - } - } - }, - ["nerv.SoftmaxLayer"] = -- softmax for decode output - { - softmax = {{}, {dim_in = {2011}, dim_out = {2011}}} - } - }, param_repo, gconf) - - layer_repo:add_layers( - { - ["nerv.DAGLayer"] = - { - global_transf = {{}, { - dim_in = {440}, dim_out = {440}, - sub_layers = layer_repo, - connections = { - ["[1]"] = "blayer1[1]", - ["blayer1[1]"] = "wlayer1[1]", - ["wlayer1[1]"] = "blayer2[1]", - ["blayer2[1]"] = "wlayer2[1]", - ["wlayer2[1]"] = "[1]" - } - }}, - main = {{}, { - dim_in = {440}, dim_out = {2011}, - sub_layers = layer_repo, - connections = { - ["[1]"] = "affine0[1]", - ["affine0[1]"] = "sigmoid0[1]", - ["sigmoid0[1]"] = "affine1[1]", - ["affine1[1]"] = "sigmoid1[1]", - ["sigmoid1[1]"] = "affine2[1]", - ["affine2[1]"] = "sigmoid2[1]", - ["sigmoid2[1]"] = "affine3[1]", - ["affine3[1]"] = "sigmoid3[1]", - ["sigmoid3[1]"] = "affine4[1]", - ["affine4[1]"] = "sigmoid4[1]", - ["sigmoid4[1]"] = "affine5[1]", - ["affine5[1]"] = "sigmoid5[1]", - ["sigmoid5[1]"] = "affine6[1]", - ["affine6[1]"] = "sigmoid6[1]", - ["sigmoid6[1]"] = "affine7[1]", - ["affine7[1]"] = "[1]" - } - }} - } - }, param_repo, gconf) - - layer_repo:add_layers( - { - ["nerv.DAGLayer"] = - { - mpe_output = {{}, { - dim_in = {440, -1}, dim_out = {1}, - sub_layers = layer_repo, - connections = { - ["[1]"] = "main[1]", - ["main[1]"] = "mpe_crit[1]", - ["[2]"] = "mpe_crit[2]", - ["mpe_crit[1]"] = "[1]" - } - }}, - softmax_output = {{}, { - dim_in = {440}, dim_out = {2011}, - sub_layers = layer_repo, - connections = { - ["[1]"] = "main[1]", - ["main[1]"] = "softmax[1]", - ["softmax[1]"] = "[1]" - } - }} - } - }, param_repo, gconf) - - return layer_repo -end - -function get_network(layer_repo) - return layer_repo:get_layer("mpe_output") -end - -function get_decode_network(layer_repo) - return layer_repo:get_layer("softmax_output") -end - -function get_global_transf(layer_repo) - return layer_repo:get_layer("global_transf") -end - -function make_readers(feature_rspecifier, layer_repo) - return { - {reader = nerv.KaldiReader(gconf, - { - id = "main_scp", - feature_rspecifier = feature_rspecifier, - frm_ext = gconf.frm_ext, - global_transf = layer_repo:get_layer("global_transf"), - mlfs = {} - }) - } - } -end - -function get_input_order() - return {{id = "main_scp", global_transf = true}, - {id = "key"}} -end - -function get_accuracy(layer_repo) - local mpe_crit = layer_repo:get_layer("mpe_crit") - return mpe_crit.total_correct / mpe_crit.total_frames * 100 -end - -function print_stat(layer_repo) - local mpe_crit = layer_repo:get_layer("mpe_crit") - nerv.info("*** training stat begin ***") - nerv.printf("correct:\t\t%d\n", mpe_crit.total_correct) - nerv.printf("frames:\t\t\t%d\n", mpe_crit.total_frames) - nerv.printf("accuracy:\t\t%.3f%%\n", get_accuracy(layer_repo)) - nerv.info("*** training stat end ***") -end diff --git a/nerv/layer/init.lua b/nerv/layer/init.lua index b74422f..25dfebb 100644 --- a/nerv/layer/init.lua +++ b/nerv/layer/init.lua @@ -80,3 +80,4 @@ nerv.include('combiner.lua') nerv.include('affine_recurrent.lua') nerv.include('softmax.lua') nerv.include('mpe.lua') +nerv.include('mmi.lua') diff --git a/nerv/layer/mmi.lua b/nerv/layer/mmi.lua new file mode 100644 index 0000000..ecc7f48 --- /dev/null +++ b/nerv/layer/mmi.lua @@ -0,0 +1,50 @@ +require 'libkaldiseq' +local MMILayer = nerv.class("nerv.MMILayer", "nerv.Layer") + +function MMILayer:__init(id, global_conf, layer_conf) + self.id = id + self.gconf = global_conf + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self.arg = layer_conf.cmd.arg + self.mdl = layer_conf.cmd.mdl + self.lat = layer_conf.cmd.lat + self.ali = layer_conf.cmd.ali + self:check_dim_len(2, -1) -- two inputs: nn output and utt key +end + +function MMILayer:init(batch_size) + self.total_frames = 0 + self.kaldi_mmi = nerv.KaldiMMI(self.arg, self.mdl, self.lat, self.ali) + if self.kaldi_mmi == nil then + nerv.error("kaldi arguments is expected: %s %s %s %s", self.arg, + self.mdl, self.lat, self.ali) + end +end + +function MMILayer:batch_resize(batch_size) + -- do nothing +end + +function MMILayer:update(bp_err, input, output) + -- no params, therefore do nothing +end + +function MMILayer:propagate(input, output) + self.valid = false + self.valid = self.kaldi_mmi:check(input[1], input[2]) + return self.valid +end + +function MMILayer:back_propagate(bp_err, next_bp_err, input, output) + if self.valid ~= true then + nerv.error("kaldi sequence training back_propagate fail") + end + local mmat = input[1]:new_to_host() + next_bp_err[1]:copy_fromh(self.kaldi_mmi:calc_diff(mmat, input[2])) + self.total_frames = self.total_frames + self.kaldi_mmi:get_num_frames() +end + +function MMILayer:get_params() + return nerv.ParamRepo({}) +end -- cgit v1.2.3-70-g09d2 From bd6d0d8b72ec656dd8fa0c13aa602f4f6e022391 Mon Sep 17 00:00:00 2001 From: Determinant Date: Fri, 9 Oct 2015 18:42:09 +0800 Subject: enable user to set direct_update in gconf; use direct_update in the example --- nerv/examples/swb_baseline.lua | 1 + nerv/layer/affine.lua | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/nerv/examples/swb_baseline.lua b/nerv/examples/swb_baseline.lua index 8015884..8f72200 100644 --- a/nerv/examples/swb_baseline.lua +++ b/nerv/examples/swb_baseline.lua @@ -2,6 +2,7 @@ require 'htk_io' gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9, cumat_type = nerv.CuMatrixFloat, mmat_type = nerv.MMatrixFloat, + direct_update = true, frm_ext = 5, frm_trim = 5, tr_scp = "/slfs1/users/mfy43/swb_ivec/train_bp.scp", diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua index 00cbcfb..b81b2a4 100644 --- a/nerv/layer/affine.lua +++ b/nerv/layer/affine.lua @@ -42,7 +42,7 @@ function AffineLayer:__init(id, global_conf, layer_conf) self.dim_out = layer_conf.dim_out self.gconf = global_conf self:check_dim_len(1, 1) -- exactly one input and one output - self.direct_update = layer_conf.direct_update + self.direct_update = layer_conf.direct_update or global_conf.direct_update end function AffineLayer:init(batch_size) -- cgit v1.2.3-70-g09d2 From 60e7718f4db981557ac48d9d375d8e23b1cd39d1 Mon Sep 17 00:00:00 2001 From: Yimmon Zhuang Date: Sat, 10 Oct 2015 19:06:53 +0800 Subject: disable batch when sequence training --- nerv/examples/seq_trainer.lua | 1 + 1 file changed, 1 insertion(+) diff --git a/nerv/examples/seq_trainer.lua b/nerv/examples/seq_trainer.lua index df96e68..b8ed3eb 100644 --- a/nerv/examples/seq_trainer.lua +++ b/nerv/examples/seq_trainer.lua @@ -49,6 +49,7 @@ function build_trainer(ifname) network:batch_resize(input[1]:nrow()) if network:propagate(input, {{}}) == true then network:back_propagate({{}}, err_output, input, {{}}) + gconf.batch_size = 1.0 - gconf.momentum -- important!!! network:update({{}}, input, {{}}) end -- collect garbage in-time to save GPU memory -- cgit v1.2.3-70-g09d2 From 473eb9c082224be19f147697ba951ae5bac4b4b4 Mon Sep 17 00:00:00 2001 From: Yimmon Zhuang Date: Sat, 10 Oct 2015 22:32:51 +0800 Subject: move sequence related layers to kaldi_seq --- nerv/Makefile | 1 - nerv/examples/mmi_chime3.lua | 1 + nerv/examples/mpe_chime3.lua | 1 + nerv/layer/init.lua | 2 -- nerv/layer/mmi.lua | 50 -------------------------------------- nerv/layer/mpe.lua | 52 ---------------------------------------- nerv/lib/matrix/generic/matrix.c | 2 +- 7 files changed, 3 insertions(+), 106 deletions(-) delete mode 100644 nerv/layer/mmi.lua delete mode 100644 nerv/layer/mpe.lua diff --git a/nerv/Makefile b/nerv/Makefile index ce178a0..b449f82 100644 --- a/nerv/Makefile +++ b/nerv/Makefile @@ -32,7 +32,6 @@ LIBS := $(INST_LIBDIR)/libnerv.so $(LIB_PATH)/libnervcore.so $(LIB_PATH)/libluaT LUA_LIBS := matrix/init.lua io/init.lua init.lua \ layer/init.lua layer/affine.lua layer/sigmoid.lua layer/softmax_ce.lua layer/softmax.lua \ layer/window.lua layer/bias.lua layer/combiner.lua layer/mse.lua layer/affine_recurrent.lua \ - layer/mpe.lua layer/mmi.lua \ nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/layer_dag.lua \ io/sgd_buffer.lua diff --git a/nerv/examples/mmi_chime3.lua b/nerv/examples/mmi_chime3.lua index a7ad268..6ac7f28 100644 --- a/nerv/examples/mmi_chime3.lua +++ b/nerv/examples/mmi_chime3.lua @@ -1,4 +1,5 @@ require 'kaldi_io' +require 'kaldi_seq' gconf = {lrate = 0.00001, wcost = 0, momentum = 0.0, cumat_type = nerv.CuMatrixFloat, mmat_type = nerv.MMatrixFloat, diff --git a/nerv/examples/mpe_chime3.lua b/nerv/examples/mpe_chime3.lua index be723ca..ec095b0 100644 --- a/nerv/examples/mpe_chime3.lua +++ b/nerv/examples/mpe_chime3.lua @@ -1,4 +1,5 @@ require 'kaldi_io' +require 'kaldi_seq' gconf = {lrate = 0.00001, wcost = 0, momentum = 0.0, cumat_type = nerv.CuMatrixFloat, mmat_type = nerv.MMatrixFloat, diff --git a/nerv/layer/init.lua b/nerv/layer/init.lua index 25dfebb..6861b0e 100644 --- a/nerv/layer/init.lua +++ b/nerv/layer/init.lua @@ -79,5 +79,3 @@ nerv.include('mse.lua') nerv.include('combiner.lua') nerv.include('affine_recurrent.lua') nerv.include('softmax.lua') -nerv.include('mpe.lua') -nerv.include('mmi.lua') diff --git a/nerv/layer/mmi.lua b/nerv/layer/mmi.lua deleted file mode 100644 index ecc7f48..0000000 --- a/nerv/layer/mmi.lua +++ /dev/null @@ -1,50 +0,0 @@ -require 'libkaldiseq' -local MMILayer = nerv.class("nerv.MMILayer", "nerv.Layer") - -function MMILayer:__init(id, global_conf, layer_conf) - self.id = id - self.gconf = global_conf - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.arg = layer_conf.cmd.arg - self.mdl = layer_conf.cmd.mdl - self.lat = layer_conf.cmd.lat - self.ali = layer_conf.cmd.ali - self:check_dim_len(2, -1) -- two inputs: nn output and utt key -end - -function MMILayer:init(batch_size) - self.total_frames = 0 - self.kaldi_mmi = nerv.KaldiMMI(self.arg, self.mdl, self.lat, self.ali) - if self.kaldi_mmi == nil then - nerv.error("kaldi arguments is expected: %s %s %s %s", self.arg, - self.mdl, self.lat, self.ali) - end -end - -function MMILayer:batch_resize(batch_size) - -- do nothing -end - -function MMILayer:update(bp_err, input, output) - -- no params, therefore do nothing -end - -function MMILayer:propagate(input, output) - self.valid = false - self.valid = self.kaldi_mmi:check(input[1], input[2]) - return self.valid -end - -function MMILayer:back_propagate(bp_err, next_bp_err, input, output) - if self.valid ~= true then - nerv.error("kaldi sequence training back_propagate fail") - end - local mmat = input[1]:new_to_host() - next_bp_err[1]:copy_fromh(self.kaldi_mmi:calc_diff(mmat, input[2])) - self.total_frames = self.total_frames + self.kaldi_mmi:get_num_frames() -end - -function MMILayer:get_params() - return nerv.ParamRepo({}) -end diff --git a/nerv/layer/mpe.lua b/nerv/layer/mpe.lua deleted file mode 100644 index ec8a8f3..0000000 --- a/nerv/layer/mpe.lua +++ /dev/null @@ -1,52 +0,0 @@ -require 'libkaldiseq' -local MPELayer = nerv.class("nerv.MPELayer", "nerv.Layer") - -function MPELayer:__init(id, global_conf, layer_conf) - self.id = id - self.gconf = global_conf - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.arg = layer_conf.cmd.arg - self.mdl = layer_conf.cmd.mdl - self.lat = layer_conf.cmd.lat - self.ali = layer_conf.cmd.ali - self:check_dim_len(2, -1) -- two inputs: nn output and utt key -end - -function MPELayer:init(batch_size) - self.total_correct = 0 - self.total_frames = 0 - self.kaldi_mpe = nerv.KaldiMPE(self.arg, self.mdl, self.lat, self.ali) - if self.kaldi_mpe == nil then - nerv.error("kaldi arguments is expected: %s %s %s %s", self.arg, - self.mdl, self.lat, self.ali) - end -end - -function MPELayer:batch_resize(batch_size) - -- do nothing -end - -function MPELayer:update(bp_err, input, output) - -- no params, therefore do nothing -end - -function MPELayer:propagate(input, output) - self.valid = false - self.valid = self.kaldi_mpe:check(input[1], input[2]) - return self.valid -end - -function MPELayer:back_propagate(bp_err, next_bp_err, input, output) - if self.valid ~= true then - nerv.error("kaldi sequence training back_propagate fail") - end - local mmat = input[1]:new_to_host() - next_bp_err[1]:copy_fromh(self.kaldi_mpe:calc_diff(mmat, input[2])) - self.total_frames = self.total_frames + self.kaldi_mpe:get_num_frames() - self.total_correct = self.total_correct + self.kaldi_mpe:get_utt_frame_acc() -end - -function MPELayer:get_params() - return nerv.ParamRepo({}) -end diff --git a/nerv/lib/matrix/generic/matrix.c b/nerv/lib/matrix/generic/matrix.c index 4319e13..4246751 100644 --- a/nerv/lib/matrix/generic/matrix.c +++ b/nerv/lib/matrix/generic/matrix.c @@ -4,7 +4,7 @@ /* FIXME: malloc failure detection */ void nerv_matrix_(data_free)(Matrix *self, Status *status) { - if(*self->data_ref == 0) return; + if(*self->data_ref == 0) return; /* FIXME: repeat free memory */ assert(*self->data_ref > 0); if (--(*self->data_ref) == 0) { -- cgit v1.2.3-70-g09d2