From a32195e3e2ae9ca0f0c7a82e73e6bddb64568c05 Mon Sep 17 00:00:00 2001 From: Determinant Date: Thu, 10 Mar 2016 13:40:11 +0800 Subject: major change: clearer param binding semantics; permit rebinding; enable resuming from previous training --- nerv/examples/asr_trainer.lua | 183 +++++++++++++++++++++++------------ nerv/examples/swb_baseline.lua | 77 ++++++++------- nerv/examples/swb_baseline2.lua | 77 ++++++++------- nerv/examples/swb_baseline_basic.lua | 162 ------------------------------- nerv/examples/timit_baseline2.lua | 64 ++++++------ nerv/init.lua | 75 +++++++------- nerv/layer/affine.lua | 43 ++++---- nerv/layer/bias.lua | 15 +-- nerv/layer/combiner.lua | 16 ++- nerv/layer/dropout.lua | 16 ++- nerv/layer/elem_mul.lua | 11 ++- nerv/layer/gru.lua | 20 ++-- nerv/layer/init.lua | 60 ++++++++---- nerv/layer/lstm.lua | 20 ++-- nerv/layer/lstm_gate.lua | 17 ++-- nerv/layer/mse.lua | 16 ++- nerv/layer/sigmoid.lua | 11 ++- nerv/layer/softmax.lua | 11 ++- nerv/layer/softmax_ce.lua | 16 ++- nerv/layer/tanh.lua | 11 ++- nerv/layer/window.lua | 15 +-- nerv/matrix/init.lua | 21 +++- nerv/nerv | 4 + nerv/nn/layer_dag.lua | 16 ++- nerv/nn/layer_repo.lua | 30 +++--- nerv/nn/param_repo.lua | 59 ++++++++++- 26 files changed, 526 insertions(+), 540 deletions(-) delete mode 100644 nerv/examples/swb_baseline_basic.lua diff --git a/nerv/examples/asr_trainer.lua b/nerv/examples/asr_trainer.lua index 5001e12..5bf28bd 100644 --- a/nerv/examples/asr_trainer.lua +++ b/nerv/examples/asr_trainer.lua @@ -1,19 +1,33 @@ require 'lfs' require 'pl' local function build_trainer(ifname) - local param_repo = nerv.ParamRepo() - param_repo:import(ifname, nil, gconf) - local layer_repo = make_layer_repo(param_repo) - local network = get_network(layer_repo) - local global_transf = get_global_transf(layer_repo) - local input_order = get_input_order() + local host_param_repo = nerv.ParamRepo() local mat_type + local src_loc_type + local train_loc_type + host_param_repo:import(ifname, nil, gconf) if gconf.use_cpu then mat_type = gconf.mmat_type + src_loc_type = nerv.ParamRepo.LOC_TYPES.ON_HOST + train_loc_type = nerv.ParamRepo.LOC_TYPES.ON_HOST else mat_type = gconf.cumat_type + src_loc_type = nerv.ParamRepo.LOC_TYPES.ON_HOST + train_loc_type = nerv.ParamRepo.LOC_TYPES.ON_DEVICE end - local iterative_trainer = function (prefix, scp_file, bp) + local param_repo = host_param_repo:copy(train_loc_type) + local layer_repo = make_layer_repo(param_repo) + local network = get_network(layer_repo) + local global_transf = get_global_transf(layer_repo) + local input_order = get_input_order() + local iterative_trainer = function (prefix, scp_file, bp, rebind_param_repo) + -- rebind the params if necessary + if rebind_param_repo then + host_param_repo = rebind_param_repo + param_repo = host_param_repo:copy(train_loc_type) + layer_repo:rebind(param_repo) + rebind_param_repo = nil + end gconf.randomize = bp -- build buffer local buffer = make_buffer(make_readers(scp_file, layer_repo)) @@ -66,20 +80,38 @@ local function build_trainer(ifname) print_stat(layer_repo) mat_type.print_profile() mat_type.clear_profile() - if (not bp) and prefix ~= nil then - nerv.info("writing back...") - local fname = string.format("%s_cv%.3f.nerv", - prefix, get_accuracy(layer_repo)) - network:get_params():export(fname, nil) + local fname + if (not bp) then + host_param_repo = param_repo:copy(src_loc_type) + if prefix ~= nil then + nerv.info("writing back...") + fname = string.format("%s_cv%.3f.nerv", + prefix, get_accuracy(layer_repo)) + host_param_repo:export(fname, nil) + end end - return get_accuracy(layer_repo) + return get_accuracy(layer_repo), host_param_repo, fname end return iterative_trainer end -local function check_and_add_defaults(spec) - for k, v in pairs(spec) do - gconf[k] = opts[string.gsub(k, '_', '-')].val or gconf[k] or v +local function check_and_add_defaults(spec, opts) + local function get_opt_val(k) + return opts[string.gsub(k, '_', '-')].val + end + local opt_v = get_opt_val("resume_from") + if opt_v then + gconf = dofile(opt_v) + else + for k, v in pairs(spec) do + local opt_v = get_opt_val(k) + if opt_v ~= nil then + gconf[k] = opt_v + elseif gconf[k] ~= nil then + elseif v ~= nil then + gconf[k] = v + end + end end end @@ -112,6 +144,13 @@ local function print_gconf() end end +local function dump_gconf(fname) + local f = io.open(fname, "w") + f:write("return ") + f:write(table.tostring(gconf)) + f:close() +end + local trainer_defaults = { lrate = 0.8, batch_size = 256, @@ -121,22 +160,26 @@ local trainer_defaults = { start_halving_inc = 0.5, halving_factor = 0.6, end_halving_inc = 0.1, + cur_iter = 1, min_iter = 1, max_iter = 20, min_halving = 5, do_halving = false, - tr_scp = nil, - cv_scp = nil, - cumat_type = nerv.CuMatrixFloat, - mmat_type = nerv.MMatrixFloat, - debug = false + cumat_tname = "nerv.CuMatrixFloat", + mmat_tname = "nerv.MMatrixFloat", + debug = false, } local options = make_options(trainer_defaults) -table.insert(options, {"help", "h", "boolean", - default = false, desc = "show this help information"}) -table.insert(options, {"dir", nil, "string", - default = nil, desc = "specify the working directory"}) +local extra_opt_spec = { + {"tr-scp", nil, "string"}, + {"cv-scp", nil, "string"}, + {"resume-from", nil, "string"}, + {"help", "h", "boolean", default = false, desc = "show this help information"}, + {"dir", nil, "string", desc = "specify the working directory"}, +} + +table.extend(options, extra_opt_spec) arg, opts = nerv.parse_args(arg, options) @@ -155,14 +198,16 @@ Note: config key like aaa_bbbb_cc could be overriden by specifying ]]-- -check_and_add_defaults(trainer_defaults) +check_and_add_defaults(trainer_defaults, opts) +gconf.mmat_type = nerv.get_type(gconf.mmat_tname) +gconf.cumat_type = nerv.get_type(gconf.cumat_tname) +gconf.use_cpu = econf.use_cpu or false local pf0 = gconf.initialized_param -local trainer = build_trainer(pf0) -local accu_best = trainer(nil, gconf.cv_scp, false) local date_pattern = "%Y%m%d%H%M%S" local logfile_name = "log" local working_dir = opts["dir"].val or string.format("nerv_%s", os.date(date_pattern)) +local rebind_param_repo = nil print_gconf() if not lfs.mkdir(working_dir) then @@ -173,37 +218,55 @@ dir.copyfile(arg[1], working_dir) -- set logfile path nerv.set_logfile(path.join(working_dir, logfile_name)) path.chdir(working_dir) -nerv.info("initial cross validation: %.3f", accu_best) -for i = 1, gconf.max_iter do - nerv.info("[NN] begin iteration %d with lrate = %.6f", i, gconf.lrate) - local accu_tr = trainer(nil, gconf.tr_scp, true) - nerv.info("[TR] training set %d: %.3f", i, accu_tr) - local accu_new = trainer( - string.format("%s_%s_iter_%d_lr%f_tr%.3f", - string.gsub( - (string.gsub(pf0[1], "(.*/)(.*)", "%2")), - "(.*)%..*", "%1"), - os.date(date_pattern), - i, gconf.lrate, - accu_tr), - gconf.cv_scp, false) - nerv.info("[CV] cross validation %d: %.3f", i, accu_new) - -- TODO: revert the weights - local accu_diff = accu_new - accu_best - if gconf.do_halving and - accu_diff < gconf.end_halving_inc and - i > gconf.min_iter then - break - end - if accu_diff < gconf.start_halving_inc and - i >= gconf.min_halving then - gconf.do_halving = true - end - if gconf.do_halving then - gconf.lrate = gconf.lrate * gconf.halving_factor - end - if accu_new > accu_best then - accu_best = accu_new - end + +-- start the training +local trainer = build_trainer(pf0) +local pr_prev +gconf.accu_best, pr_prev = trainer(nil, gconf.cv_scp, false) +nerv.info("initial cross validation: %.3f", gconf.accu_best) +for i = gconf.cur_iter, gconf.max_iter do + local stop = false + gconf.cur_iter = i + dump_gconf(string.format("iter_%d.meta", i)) + repeat -- trick to implement `continue` statement + nerv.info("[NN] begin iteration %d with lrate = %.6f", i, gconf.lrate) + local accu_tr = trainer(nil, gconf.tr_scp, true, rebind_param_repo) + nerv.info("[TR] training set %d: %.3f", i, accu_tr) + local param_prefix = string.format("%s_%s_iter_%d_lr%f_tr%.3f", + string.gsub( + (string.gsub(pf0[1], "(.*/)(.*)", "%2")), + "(.*)%..*", "%1"), + os.date(date_pattern), + i, gconf.lrate, + accu_tr) + local accu_new, pr_new, param_fname = trainer(param_prefix, gconf.cv_scp, false) + nerv.info("[CV] cross validation %d: %.3f", i, accu_new) + local accu_prev = gconf.accu_best + if accu_new < gconf.accu_best then + nerv.info("rejecting the trained params, rollback to the previous one") + file.move(param_fname, param_fname .. ".rejected") + rebind_param_repo = pr_prev + break -- `continue` equivalent + else + nerv.info("accepting the trained params") + gconf.accu_best = accu_new + pr_prev = pr_new + gconf.initialized_param = {path.join(path.currentdir(), param_fname)} + end + if gconf.do_halving and + gconf.accu_best - accu_prev < gconf.end_halving_inc and + i > gconf.min_iter then + stop = true + break + end + if gconf.accu_best - accu_prev < gconf.start_halving_inc and + i >= gconf.min_halving then + gconf.do_halving = true + end + if gconf.do_halving then + gconf.lrate = gconf.lrate * gconf.halving_factor + end + until true + if stop then break end -- nerv.Matrix.print_profile() end diff --git a/nerv/examples/swb_baseline.lua b/nerv/examples/swb_baseline.lua index 4cb2389..0ce8468 100644 --- a/nerv/examples/swb_baseline.lua +++ b/nerv/examples/swb_baseline.lua @@ -7,8 +7,7 @@ gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9, cv_scp = "/slfs1/users/mfy43/swb_ivec/train_cv.scp", htk_conf = "/slfs1/users/mfy43/swb_ivec/plp_0_d_a.conf", initialized_param = {"/slfs1/users/mfy43/swb_init.nerv", - "/slfs1/users/mfy43/swb_global_transf.nerv"}, - debug = false} + "/slfs1/users/mfy43/swb_global_transf.nerv"}} function make_layer_repo(param_repo) local layer_repo = nerv.LayerRepo( @@ -16,51 +15,51 @@ function make_layer_repo(param_repo) -- global transf ["nerv.BiasLayer"] = { - blayer1 = {{bias = "bias1"}, {dim_in = {429}, dim_out = {429}}}, - blayer2 = {{bias = "bias2"}, {dim_in = {429}, dim_out = {429}}} + blayer1 = {dim_in = {429}, dim_out = {429}, params = {bias = "bias1"}}, + blayer2 = {dim_in = {429}, dim_out = {429}, params = {bias = "bias2"}} }, ["nerv.WindowLayer"] = { - wlayer1 = {{window = "window1"}, {dim_in = {429}, dim_out = {429}}}, - wlayer2 = {{window = "window2"}, {dim_in = {429}, dim_out = {429}}} + wlayer1 = {dim_in = {429}, dim_out = {429}, params = {window = "window1"}}, + wlayer2 = {dim_in = {429}, dim_out = {429}, params = {window = "window2"}} }, -- biased linearity ["nerv.AffineLayer"] = { - affine0 = {{ltp = "affine0_ltp", bp = "affine0_bp"}, - {dim_in = {429}, dim_out = {2048}}}, - affine1 = {{ltp = "affine1_ltp", bp = "affine1_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine2 = {{ltp = "affine2_ltp", bp = "affine2_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine3 = {{ltp = "affine3_ltp", bp = "affine3_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine4 = {{ltp = "affine4_ltp", bp = "affine4_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine5 = {{ltp = "affine5_ltp", bp = "affine5_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine6 = {{ltp = "affine6_ltp", bp = "affine6_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine7 = {{ltp = "affine7_ltp", bp = "affine7_bp"}, - {dim_in = {2048}, dim_out = {3001}}} + affine0 = {dim_in = {429}, dim_out = {2048}, + params = {ltp = "affine0_ltp", bp = "affine0_bp"}}, + affine1 = {dim_in = {2048}, dim_out = {2048}, + params = {ltp = "affine1_ltp", bp = "affine1_bp"}}, + affine2 = {dim_in = {2048}, dim_out = {2048}, + params = {ltp = "affine2_ltp", bp = "affine2_bp"}}, + affine3 = {dim_in = {2048}, dim_out = {2048}, + params = {ltp = "affine3_ltp", bp = "affine3_bp"}}, + affine4 = {dim_in = {2048}, dim_out = {2048}, + params = {ltp = "affine4_ltp", bp = "affine4_bp"}}, + affine5 = {dim_in = {2048}, dim_out = {2048}, + params = {ltp = "affine5_ltp", bp = "affine5_bp"}}, + affine6 = {dim_in = {2048}, dim_out = {2048}, + params = {ltp = "affine6_ltp", bp = "affine6_bp"}}, + affine7 = {dim_in = {2048}, dim_out = {3001}, + params = {ltp = "affine7_ltp", bp = "affine7_bp"}} }, ["nerv.SigmoidLayer"] = { - sigmoid0 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid1 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid2 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid3 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid4 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid5 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid6 = {{}, {dim_in = {2048}, dim_out = {2048}}} + sigmoid0 = {dim_in = {2048}, dim_out = {2048}}, + sigmoid1 = {dim_in = {2048}, dim_out = {2048}}, + sigmoid2 = {dim_in = {2048}, dim_out = {2048}}, + sigmoid3 = {dim_in = {2048}, dim_out = {2048}}, + sigmoid4 = {dim_in = {2048}, dim_out = {2048}}, + sigmoid5 = {dim_in = {2048}, dim_out = {2048}}, + sigmoid6 = {dim_in = {2048}, dim_out = {2048}} }, ["nerv.SoftmaxCELayer"] = -- softmax + ce criterion layer for finetune output { - ce_crit = {{}, {dim_in = {3001, 1}, dim_out = {1}, compressed = true}} + ce_crit = {dim_in = {3001, 1}, dim_out = {1}, compressed = true} }, ["nerv.SoftmaxLayer"] = -- softmax for decode output { - softmax = {{}, {dim_in = {3001}, dim_out = {3001}}} + softmax = {dim_in = {3001}, dim_out = {3001}} } }, param_repo, gconf) @@ -68,7 +67,7 @@ function make_layer_repo(param_repo) { ["nerv.DAGLayer"] = { - global_transf = {{}, { + global_transf = { dim_in = {429}, dim_out = {429}, sub_layers = layer_repo, connections = { @@ -78,8 +77,8 @@ function make_layer_repo(param_repo) ["blayer2[1]"] = "wlayer2[1]", ["wlayer2[1]"] = "[1]" } - }}, - main = {{}, { + }, + main = { dim_in = {429}, dim_out = {3001}, sub_layers = layer_repo, connections = { @@ -100,7 +99,7 @@ function make_layer_repo(param_repo) ["sigmoid6[1]"] = "affine7[1]", ["affine7[1]"] = "[1]" } - }} + } } }, param_repo, gconf) @@ -108,7 +107,7 @@ function make_layer_repo(param_repo) { ["nerv.DAGLayer"] = { - ce_output = {{}, { + ce_output = { dim_in = {429, 1}, dim_out = {1}, sub_layers = layer_repo, connections = { @@ -117,8 +116,8 @@ function make_layer_repo(param_repo) ["[2]"] = "ce_crit[2]", ["ce_crit[1]"] = "[1]" } - }}, - softmax_output = {{}, { + }, + softmax_output = { dim_in = {429}, dim_out = {3001}, sub_layers = layer_repo, connections = { @@ -126,7 +125,7 @@ function make_layer_repo(param_repo) ["main[1]"] = "softmax[1]", ["softmax[1]"] = "[1]" } - }} + } } }, param_repo, gconf) diff --git a/nerv/examples/swb_baseline2.lua b/nerv/examples/swb_baseline2.lua index b0b9689..8b5ebb1 100644 --- a/nerv/examples/swb_baseline2.lua +++ b/nerv/examples/swb_baseline2.lua @@ -7,8 +7,7 @@ gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9, cv_scp = "/speechlab/users/mfy43/swb50/train_cv.scp", htk_conf = "/speechlab/users/mfy43/swb50/plp_0_d_a.conf", initialized_param = {"/speechlab/users/mfy43/swb50/swb_init.nerv", - "/speechlab/users/mfy43/swb50/swb_global_transf.nerv"}, - debug = false} + "/speechlab/users/mfy43/swb50/swb_global_transf.nerv"}} function make_layer_repo(param_repo) local layer_repo = nerv.LayerRepo( @@ -16,51 +15,51 @@ function make_layer_repo(param_repo) -- global transf ["nerv.BiasLayer"] = { - blayer1 = {{bias = "bias1"}, {dim_in = {429}, dim_out = {429}}}, - blayer2 = {{bias = "bias2"}, {dim_in = {429}, dim_out = {429}}} + blayer1 = {dim_in = {429}, dim_out = {429}, params = {bias = "bias1"}}, + blayer2 = {dim_in = {429}, dim_out = {429}, params = {bias = "bias2"}} }, ["nerv.WindowLayer"] = { - wlayer1 = {{window = "window1"}, {dim_in = {429}, dim_out = {429}}}, - wlayer2 = {{window = "window2"}, {dim_in = {429}, dim_out = {429}}} + wlayer1 = {dim_in = {429}, dim_out = {429}, params = {window = "window1"}}, + wlayer2 = {dim_in = {429}, dim_out = {429}, params = {window = "window2"}} }, -- biased linearity ["nerv.AffineLayer"] = { - affine0 = {{ltp = "affine0_ltp", bp = "affine0_bp"}, - {dim_in = {429}, dim_out = {2048}}}, - affine1 = {{ltp = "affine1_ltp", bp = "affine1_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine2 = {{ltp = "affine2_ltp", bp = "affine2_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine3 = {{ltp = "affine3_ltp", bp = "affine3_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine4 = {{ltp = "affine4_ltp", bp = "affine4_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine5 = {{ltp = "affine5_ltp", bp = "affine5_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine6 = {{ltp = "affine6_ltp", bp = "affine6_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine7 = {{ltp = "affine7_ltp", bp = "affine7_bp"}, - {dim_in = {2048}, dim_out = {3001}}} + affine0 = {dim_in = {429}, dim_out = {2048}, + params = {ltp = "affine0_ltp", bp = "affine0_bp"}}, + affine1 = {dim_in = {2048}, dim_out = {2048}, + params = {ltp = "affine1_ltp", bp = "affine1_bp"}}, + affine2 = {dim_in = {2048}, dim_out = {2048}, + params = {ltp = "affine2_ltp", bp = "affine2_bp"}}, + affine3 = {dim_in = {2048}, dim_out = {2048}, + params = {ltp = "affine3_ltp", bp = "affine3_bp"}}, + affine4 = {dim_in = {2048}, dim_out = {2048}, + params = {ltp = "affine4_ltp", bp = "affine4_bp"}}, + affine5 = {dim_in = {2048}, dim_out = {2048}, + params = {ltp = "affine5_ltp", bp = "affine5_bp"}}, + affine6 = {dim_in = {2048}, dim_out = {2048}, + params = {ltp = "affine6_ltp", bp = "affine6_bp"}}, + affine7 = {dim_in = {2048}, dim_out = {3001}, + params = {ltp = "affine7_ltp", bp = "affine7_bp"}} }, ["nerv.SigmoidLayer"] = { - sigmoid0 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid1 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid2 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid3 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid4 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid5 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid6 = {{}, {dim_in = {2048}, dim_out = {2048}}} + sigmoid0 = {dim_in = {2048}, dim_out = {2048}}, + sigmoid1 = {dim_in = {2048}, dim_out = {2048}}, + sigmoid2 = {dim_in = {2048}, dim_out = {2048}}, + sigmoid3 = {dim_in = {2048}, dim_out = {2048}}, + sigmoid4 = {dim_in = {2048}, dim_out = {2048}}, + sigmoid5 = {dim_in = {2048}, dim_out = {2048}}, + sigmoid6 = {dim_in = {2048}, dim_out = {2048}} }, ["nerv.SoftmaxCELayer"] = -- softmax + ce criterion layer for finetune output { - ce_crit = {{}, {dim_in = {3001, 1}, dim_out = {1}, compressed = true}} + ce_crit = {dim_in = {3001, 1}, dim_out = {1}, compressed = true} }, ["nerv.SoftmaxLayer"] = -- softmax for decode output { - softmax = {{}, {dim_in = {3001}, dim_out = {3001}}} + softmax = {dim_in = {3001}, dim_out = {3001}} } }, param_repo, gconf) @@ -68,7 +67,7 @@ function make_layer_repo(param_repo) { ["nerv.DAGLayer"] = { - global_transf = {{}, { + global_transf = { dim_in = {429}, dim_out = {429}, sub_layers = layer_repo, connections = { @@ -78,8 +77,8 @@ function make_layer_repo(param_repo) ["blayer2[1]"] = "wlayer2[1]", ["wlayer2[1]"] = "[1]" } - }}, - main = {{}, { + }, + main = { dim_in = {429}, dim_out = {3001}, sub_layers = layer_repo, connections = { @@ -100,7 +99,7 @@ function make_layer_repo(param_repo) ["sigmoid6[1]"] = "affine7[1]", ["affine7[1]"] = "[1]" } - }} + } } }, param_repo, gconf) @@ -108,7 +107,7 @@ function make_layer_repo(param_repo) { ["nerv.DAGLayer"] = { - ce_output = {{}, { + ce_output = { dim_in = {429, 1}, dim_out = {1}, sub_layers = layer_repo, connections = { @@ -117,8 +116,8 @@ function make_layer_repo(param_repo) ["[2]"] = "ce_crit[2]", ["ce_crit[1]"] = "[1]" } - }}, - softmax_output = {{}, { + }, + softmax_output = { dim_in = {429}, dim_out = {3001}, sub_layers = layer_repo, connections = { @@ -126,7 +125,7 @@ function make_layer_repo(param_repo) ["main[1]"] = "softmax[1]", ["softmax[1]"] = "[1]" } - }} + } } }, param_repo, gconf) diff --git a/nerv/examples/swb_baseline_basic.lua b/nerv/examples/swb_baseline_basic.lua deleted file mode 100644 index 71f04a3..0000000 --- a/nerv/examples/swb_baseline_basic.lua +++ /dev/null @@ -1,162 +0,0 @@ -require 'htk_io' -gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9, - cumat_type = nerv.CuMatrixFloat, - mmat_type = nerv.MMatrixFloat, - frm_ext = 5, - frm_trim = 5, - tr_scp = "/slfs1/users/mfy43/swb_ivec/train_bp.scp", - cv_scp = "/slfs1/users/mfy43/swb_ivec/train_cv.scp", - htk_conf = "/slfs1/users/mfy43/swb_ivec/plp_0_d_a.conf", - initialized_param = {"/slfs1/users/mfy43/swb_init.nerv", - "/slfs1/users/mfy43/swb_global_transf.nerv"}, - debug = false} - -function make_layer_repo(param_repo) - local layer_repo = nerv.LayerRepo( - { - -- global transf - ["nerv.BiasLayer"] = - { - blayer1 = {{bias = "bias1"}, {dim_in = {429}, dim_out = {429}}}, - blayer2 = {{bias = "bias2"}, {dim_in = {429}, dim_out = {429}}} - }, - ["nerv.WindowLayer"] = - { - wlayer1 = {{window = "window1"}, {dim_in = {429}, dim_out = {429}}}, - wlayer2 = {{window = "window2"}, {dim_in = {429}, dim_out = {429}}} - }, - -- biased linearity - ["nerv.AffineLayer"] = - { - affine0 = {{ltp = "affine0_ltp", bp = "affine0_bp"}, - {dim_in = {429}, dim_out = {2048}}}, - affine1 = {{ltp = "affine1_ltp", bp = "affine1_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine2 = {{ltp = "affine2_ltp", bp = "affine2_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine3 = {{ltp = "affine3_ltp", bp = "affine3_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine4 = {{ltp = "affine4_ltp", bp = "affine4_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine5 = {{ltp = "affine5_ltp", bp = "affine5_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine6 = {{ltp = "affine6_ltp", bp = "affine6_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine7 = {{ltp = "affine7_ltp", bp = "affine7_bp"}, - {dim_in = {2048}, dim_out = {3001}}} - }, - ["nerv.SigmoidLayer"] = - { - sigmoid0 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid1 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid2 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid3 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid4 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid5 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid6 = {{}, {dim_in = {2048}, dim_out = {2048}}} - }, - ["nerv.SoftmaxCELayer"] = - { - ce_crit = {{}, {dim_in = {3001, 1}, dim_out = {1}, compressed = true}} - } - }, param_repo, gconf) - - layer_repo:add_layers( - { - ["nerv.DAGLayer"] = - { - global_transf = {{}, { - dim_in = {429}, dim_out = {429}, - sub_layers = layer_repo, - connections = { - ["[1]"] = "blayer1[1]", - ["blayer1[1]"] = "wlayer1[1]", - ["wlayer1[1]"] = "blayer2[1]", - ["blayer2[1]"] = "wlayer2[1]", - ["wlayer2[1]"] = "[1]" - } - }}, - main = {{}, { - dim_in = {429, 1}, dim_out = {1}, - sub_layers = layer_repo, - connections = { - ["[1]"] = "affine0[1]", - ["affine0[1]"] = "sigmoid0[1]", - ["sigmoid0[1]"] = "affine1[1]", - ["affine1[1]"] = "sigmoid1[1]", - ["sigmoid1[1]"] = "affine2[1]", - ["affine2[1]"] = "sigmoid2[1]", - ["sigmoid2[1]"] = "affine3[1]", - ["affine3[1]"] = "sigmoid3[1]", - ["sigmoid3[1]"] = "affine4[1]", - ["affine4[1]"] = "sigmoid4[1]", - ["sigmoid4[1]"] = "affine5[1]", - ["affine5[1]"] = "sigmoid5[1]", - ["sigmoid5[1]"] = "affine6[1]", - ["affine6[1]"] = "sigmoid6[1]", - ["sigmoid6[1]"] = "affine7[1]", - ["affine7[1]"] = "ce_crit[1]", - ["[2]"] = "ce_crit[2]", - ["ce_crit[1]"] = "[1]" - } - }} - } - }, param_repo, gconf) - return layer_repo -end - -function get_network(layer_repo) - return layer_repo:get_layer("main") -end - -function make_readers(scp_file, layer_repo) - return { - {reader = nerv.TNetReader(gconf, - { - id = "main_scp", - scp_file = scp_file, - conf_file = gconf.htk_conf, - frm_ext = gconf.frm_ext, - mlfs = { - phone_state = { - file = "/slfs1/users/mfy43/swb_ivec/ref.mlf", - format = "map", - format_arg = "/slfs1/users/mfy43/swb_ivec/dict", - dir = "*/", - ext = "lab" - } - } - }), - data = {main_scp = 429, phone_state = 1}} - } -end - -function make_buffer(readers) - return nerv.SGDBuffer(gconf, - { - buffer_size = gconf.buffer_size, - randomize = gconf.randomize, - readers = readers - }) -end - -function get_input_order() - return {{id = "main_scp", global_transf = true}, - {id = "phone_state"}} -end - -function get_accuracy(layer_repo) - local ce_crit = layer_repo:get_layer("ce_crit") - return ce_crit.total_correct / ce_crit.total_frames * 100 -end - -function print_stat(layer_repo) - local ce_crit = layer_repo:get_layer("ce_crit") - nerv.info("*** training stat begin ***") - nerv.printf("cross entropy:\t\t%.8f\n", ce_crit.total_ce) - nerv.printf("correct:\t\t%d\n", ce_crit.total_correct) - nerv.printf("frames:\t\t\t%d\n", ce_crit.total_frames) - nerv.printf("err/frm:\t\t%.8f\n", ce_crit.total_ce / ce_crit.total_frames) - nerv.printf("accuracy:\t\t%.3f%%\n", get_accuracy(layer_repo)) - nerv.info("*** training stat end ***") -end diff --git a/nerv/examples/timit_baseline2.lua b/nerv/examples/timit_baseline2.lua index 103d89d..2d144b5 100644 --- a/nerv/examples/timit_baseline2.lua +++ b/nerv/examples/timit_baseline2.lua @@ -16,46 +16,46 @@ function make_layer_repo(param_repo) -- global transf ["nerv.BiasLayer"] = { - blayer1 = {{bias = "bias0"}, {dim_in = {440}, dim_out = {440}}} + blayer1 = {dim_in = {440}, dim_out = {440}, params = {bias = "bias0"}} }, ["nerv.WindowLayer"] = { - wlayer1 = {{window = "window0"}, {dim_in = {440}, dim_out = {440}}} + wlayer1 = {dim_in = {440}, dim_out = {440}, params = {window = "window0"}} }, -- biased linearity ["nerv.AffineLayer"] = { - affine0 = {{ltp = "affine0_ltp", bp = "affine0_bp"}, - {dim_in = {440}, dim_out = {1024}}}, - affine1 = {{ltp = "affine1_ltp", bp = "affine1_bp"}, - {dim_in = {1024}, dim_out = {1024}}}, - affine2 = {{ltp = "affine2_ltp", bp = "affine2_bp"}, - {dim_in = {1024}, dim_out = {1024}}}, - affine3 = {{ltp = "affine3_ltp", bp = "affine3_bp"}, - {dim_in = {1024}, dim_out = {1024}}}, - affine4 = {{ltp = "affine4_ltp", bp = "affine4_bp"}, - {dim_in = {1024}, dim_out = {1024}}}, - affine5 = {{ltp = "affine5_ltp", bp = "affine5_bp"}, - {dim_in = {1024}, dim_out = {1024}}}, - affine6 = {{ltp = "affine6_ltp", bp = "affine6_bp"}, - {dim_in = {1024}, dim_out = {1959}}} + affine0 = {dim_in = {440}, dim_out = {1024}, + params = {ltp = "affine0_ltp", bp = "affine0_bp"}}, + affine1 = {dim_in = {1024}, dim_out = {1024}, + params = {ltp = "affine1_ltp", bp = "affine1_bp"}}, + affine2 = {dim_in = {1024}, dim_out = {1024}, + params = {ltp = "affine2_ltp", bp = "affine2_bp"}}, + affine3 = {dim_in = {1024}, dim_out = {1024}, + params = {ltp = "affine3_ltp", bp = "affine3_bp"}}, + affine4 = {dim_in = {1024}, dim_out = {1024}, + params = {ltp = "affine4_ltp", bp = "affine4_bp"}}, + affine5 = {dim_in = {1024}, dim_out = {1024}, + params = {ltp = "affine5_ltp", bp = "affine5_bp"}}, + affine6 = {dim_in = {1024}, dim_out = {1959}, + params = {ltp = "affine6_ltp", bp = "affine6_bp"}} }, ["nerv.SigmoidLayer"] = { - sigmoid0 = {{}, {dim_in = {1024}, dim_out = {1024}}}, - sigmoid1 = {{}, {dim_in = {1024}, dim_out = {1024}}}, - sigmoid2 = {{}, {dim_in = {1024}, dim_out = {1024}}}, - sigmoid3 = {{}, {dim_in = {1024}, dim_out = {1024}}}, - sigmoid4 = {{}, {dim_in = {1024}, dim_out = {1024}}}, - sigmoid5 = {{}, {dim_in = {1024}, dim_out = {1024}}} + sigmoid0 = {dim_in = {1024}, dim_out = {1024}}, + sigmoid1 = {dim_in = {1024}, dim_out = {1024}}, + sigmoid2 = {dim_in = {1024}, dim_out = {1024}}, + sigmoid3 = {dim_in = {1024}, dim_out = {1024}}, + sigmoid4 = {dim_in = {1024}, dim_out = {1024}}, + sigmoid5 = {dim_in = {1024}, dim_out = {1024}} }, ["nerv.SoftmaxCELayer"] = -- softmax + ce criterion layer for finetune output { - ce_crit = {{}, {dim_in = {1959, 1}, dim_out = {1}, compressed = true}} + ce_crit = {dim_in = {1959, 1}, dim_out = {1}, compressed = true} }, ["nerv.SoftmaxLayer"] = -- softmax for decode output { - softmax = {{}, {dim_in = {1959}, dim_out = {1959}}} + softmax = {dim_in = {1959}, dim_out = {1959}} } }, param_repo, gconf) @@ -63,7 +63,7 @@ function make_layer_repo(param_repo) { ["nerv.DAGLayer"] = { - global_transf = {{}, { + global_transf = { dim_in = {440}, dim_out = {440}, sub_layers = layer_repo, connections = { @@ -71,8 +71,8 @@ function make_layer_repo(param_repo) ["blayer1[1]"] = "wlayer1[1]", ["wlayer1[1]"] = "[1]" } - }}, - main = {{}, { + }, + main = { dim_in = {440}, dim_out = {1959}, sub_layers = layer_repo, connections = { @@ -91,7 +91,7 @@ function make_layer_repo(param_repo) ["sigmoid5[1]"] = "affine6[1]", ["affine6[1]"] = "[1]" } - }} + } } }, param_repo, gconf) @@ -99,7 +99,7 @@ function make_layer_repo(param_repo) { ["nerv.DAGLayer"] = { - ce_output = {{}, { + ce_output = { dim_in = {440, 1}, dim_out = {1}, sub_layers = layer_repo, connections = { @@ -108,8 +108,8 @@ function make_layer_repo(param_repo) ["[2]"] = "ce_crit[2]", ["ce_crit[1]"] = "[1]" } - }}, - softmax_output = {{}, { + }, + softmax_output = { dim_in = {440}, dim_out = {1959}, sub_layers = layer_repo, connections = { @@ -117,7 +117,7 @@ function make_layer_repo(param_repo) ["main[1]"] = "softmax[1]", ["softmax[1]"] = "[1]" } - }} + } } }, param_repo, gconf) diff --git a/nerv/init.lua b/nerv/init.lua index 4d7b687..da7df29 100644 --- a/nerv/init.lua +++ b/nerv/init.lua @@ -98,24 +98,27 @@ function nerv.class(tname, parenttname) end function table.val_to_str(v) - if "string" == type(v) then - v = string.gsub(v, "\n", "\\n") - if string.match(string.gsub(v,"[^'\"]",""), '^"+$') then - return "'" .. v .. "'" + if "string" == type(v) then + v = string.gsub(v, "\n", "\\n") + if string.match(string.gsub(v,"[^'\"]",""), '^"+$') then + return "'" .. v .. "'" + end + return '"' .. string.gsub(v,'"', '\\"') .. '"' + else + return "table" == type(v) and table.tostring(v) or + (("number" == type(v) or + "string" == type(v) or + "boolean" == type(v)) and tostring(v)) or + nil -- failed to serialize end - return '"' .. string.gsub(v,'"', '\\"') .. '"' - else - return "table" == type(v) and table.tostring(v) or - tostring(v) - end end function table.key_to_str (k) - if "string" == type(k) and string.match(k, "^[_%a][_%a%d]*$") then - return k - else - return "[" .. table.val_to_str(k) .. "]" - end + if "string" == type(k) and string.match(k, "^[_%a][_%a%d]*$") then + return k + else + return "[" .. table.val_to_str(k) .. "]" + end end --- Get the string representation of a table, which can be executed as a valid @@ -124,18 +127,18 @@ end -- @return the string representation which will result in a Lua table entity -- when evaluated function table.tostring(tbl) - local result, done = {}, {} - for k, v in ipairs(tbl) do - table.insert(result, table.val_to_str(v)) - done[k] = true - end - for k, v in pairs(tbl) do - if not done[k] then - table.insert(result, - table.key_to_str(k) .. "=" .. table.val_to_str(v)) + local result, done = {}, {} + for k, v in ipairs(tbl) do + table.insert(result, table.val_to_str(v)) + done[k] = true end - end - return "{" .. table.concat(result, ",") .. "}" + for k, v in pairs(tbl) do + if not done[k] then + table.insert(result, + table.key_to_str(k) .. "=" .. table.val_to_str(v)) + end + end + return "{" .. table.concat(result, ",") .. "}" end --- Get the class by name. @@ -332,27 +335,17 @@ function nerv.print_usage(options) (opt_full and '--' .. opt_full) or "", (opt_short and '-' .. opt_short) or "", opt_type, - v.default or "", + (v.default ~= nil and tostring(v.default)) or "", v.desc or "") end nerv.printf("\n") end --- function nerv.copy_file(fname1, fname2) --- local fin, fout, err --- fin, err = io.open(fname1, "r") --- if fin then --- fout, err = io.open(fname2, "w") --- end --- if not (fin and fout) then --- nerv.error("[copy] from %s to %s: %s", fname1, fname2, err) --- end --- while true do --- local b = fin:read(1024) --- if b == nil then break end --- fout:write(b) --- end --- end +function table.extend(tbl1, tbl2) + for _, v in ipairs(tbl2) do + table.insert(tbl1, v) + end +end -- the following lines trigger the initialization of basic modules diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua index 4156dde..38743aa 100644 --- a/nerv/layer/affine.lua +++ b/nerv/layer/affine.lua @@ -8,21 +8,19 @@ local AffineLayer = nerv.class('nerv.AffineLayer', 'nerv.Layer') --- A parameter that consists of a single matrix -- @type nerv.MatrixParam +function MatrixParam:check(checker) + -- check trans matrix type + checker(self.trans) +end + --- Read from a file handle. -- @param handle the file handle function MatrixParam:read(handle) self.trans = self.gconf.mmat_type.load(handle) - if not self.gconf.use_cpu then - self.trans = self.gconf.cumat_type.new_from_host(self.trans) - end end function MatrixParam:write(handle) - local trans = self.trans - if not self.gconf.use_cpu then - trans = self.trans:new_to_host() - end - trans:save(handle) + self.trans:save(handle) end function MatrixParam:train_init() @@ -30,6 +28,12 @@ function MatrixParam:train_init() self.correction:fill(0) end +function MatrixParam:copy(copier) + local target = nerv.MatrixParam(self.id, self.gconf) + target.trans = copier(self.trans) + return target +end + function MatrixParam:_update_by_gradient(gradient, alpha, beta) local gconf = self.gconf -- momentum gain @@ -77,25 +81,24 @@ end --- The constructor. function AffineLayer:__init(id, global_conf, layer_conf) - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - if layer_conf.ltp ~= nil and layer_conf.ltp1 == nil then - layer_conf.ltp1 = layer_conf.ltp - end + nerv.Layer.__init(self, id, global_conf, layer_conf) + self:check_dim_len(-1, 1) -- exactly one output, allow multiple inputs + self:bind_params() +end + +function AffineLayer:bind_params() for i = 1, #self.dim_in do local pid = "ltp" .. i local pid_list = i == 1 and {pid, "ltp"} or pid - self["ltp" .. i] = self:find_param(pid_list, layer_conf, global_conf, + self["ltp" .. i] = self:find_param(pid_list, self.lconf, self.gconf, nerv.LinearTransParam, - {self.dim_in[i], self.dim_out[1]}) + {self.dim_in[i], self.dim_out[1]}) end self.ltp = self.ltp1 -- alias of ltp1 - self.bp = self:find_param("bp", layer_conf, global_conf, + self.bp = self:find_param("bp", self.lconf, self.gconf, nerv.BiasParam, {1, self.dim_out[1]}) - self.gconf = global_conf - self:check_dim_len(-1, 1) -- exactly one output, allow multiple inputs + end function AffineLayer:init(batch_size) @@ -142,7 +145,7 @@ function AffineLayer:back_propagate(bp_err, next_bp_err, input, output) end function AffineLayer:get_params() - local pr = nerv.ParamRepo({self.ltp1, self.bp}) + local pr = nerv.ParamRepo({self.ltp1, self.bp}, self.loc_type) for i = 2, #self.dim_in do pr:add(self["ltp" .. i].id, self["ltp" .. i]) end diff --git a/nerv/layer/bias.lua b/nerv/layer/bias.lua index 924c3da..191be78 100644 --- a/nerv/layer/bias.lua +++ b/nerv/layer/bias.lua @@ -1,12 +1,15 @@ local BiasLayer = nerv.class("nerv.BiasLayer", "nerv.Layer") function BiasLayer:__init(id, global_conf, layer_conf) - self.id = id - self.gconf = global_conf - self.bias = layer_conf.bias - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out + nerv.Layer.__init(self, id, global_conf, layer_conf) self:check_dim_len(1, 1) + self:bind_params() +end + +function BiasLayer:bind_params() + self.bias = self:find_param("bias", self.lconf, self.gconf, + nerv.BiasParam, + {1, self.dim_out[1]}) end function BiasLayer:init() @@ -28,5 +31,5 @@ function BiasLayer:propagate(input, output) end function BiasLayer:get_params() - return nerv.ParamRepo({self.bias}) + return nerv.ParamRepo({self.bias}, self.loc_type) end diff --git a/nerv/layer/combiner.lua b/nerv/layer/combiner.lua index 22e89a9..028c970 100644 --- a/nerv/layer/combiner.lua +++ b/nerv/layer/combiner.lua @@ -1,16 +1,8 @@ local CombinerLayer = nerv.class('nerv.CombinerLayer', 'nerv.Layer') function CombinerLayer:__init(id, global_conf, layer_conf) - self.id = id + nerv.Layer.__init(self, id, global_conf, layer_conf) self.lambda = layer_conf.lambda - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf - if self.gconf.use_cpu then - self.mat_type = self.gconf.mmat_type - else - self.mat_type = self.gconf.cumat_type - end self:check_dim_len(#self.lambda, -1) if #self.dim_in < 1 then nerv.error("no input specified") @@ -20,6 +12,10 @@ function CombinerLayer:__init(id, global_conf, layer_conf) end end +function CombinerLayer:bind_params() + -- do nothing +end + function CombinerLayer:init(batch_size) local dim = self.dim_in[1] for i = 2, #self.dim_in do @@ -66,5 +62,5 @@ function CombinerLayer:back_propagate(bp_err, next_bp_err, input, output) end function CombinerLayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/dropout.lua b/nerv/layer/dropout.lua index 42660cc..1a379c9 100644 --- a/nerv/layer/dropout.lua +++ b/nerv/layer/dropout.lua @@ -1,22 +1,18 @@ local DropoutLayer = nerv.class("nerv.DropoutLayer", "nerv.Layer") function DropoutLayer:__init(id, global_conf, layer_conf) - self.id = id - self.gconf = global_conf - if self.gconf.use_cpu then - self.mat_type = self.gconf.mmat_type - else - self.mat_type = self.gconf.cumat_type - end + nerv.Layer.__init(self, id, global_conf, layer_conf) self.rate = layer_conf.dropout_rate or global_conf.dropout_rate if self.rate == nil then nerv.warning("[DropoutLayer:propagate] dropout rate is not set") end - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out self:check_dim_len(1, 1) -- two inputs: nn output and label end +function DropoutLayer:bind_params() + -- do nothing +end + function DropoutLayer:init(batch_size, chunk_size) if self.dim_in[1] ~= self.dim_out[1] then nerv.error("mismatching dimensions of input and output") @@ -73,5 +69,5 @@ function DropoutLayer:back_propagate(bp_err, next_bp_err, input, output, t) end function DropoutLayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/elem_mul.lua b/nerv/layer/elem_mul.lua index fe80a3f..f03649b 100644 --- a/nerv/layer/elem_mul.lua +++ b/nerv/layer/elem_mul.lua @@ -1,14 +1,15 @@ local ElemMulLayer = nerv.class('nerv.ElemMulLayer', 'nerv.Layer') function ElemMulLayer:__init(id, global_conf, layer_conf) - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf + nerv.Layer.__init(self, id, global_conf, layer_conf) -- element-wise multiplication of input[1] and input[2] self:check_dim_len(2, 1) end +function ElemMulLayer:bind_params() + -- do nothing +end + function ElemMulLayer:init(batch_size) if self.dim_in[1] ~= self.dim_in[2] or self.dim_in[1] ~= self.dim_out[1] then @@ -34,5 +35,5 @@ function ElemMulLayer:update(bp_err, input, output) end function ElemMulLayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/gru.lua b/nerv/layer/gru.lua index e81d21a..a590a67 100644 --- a/nerv/layer/gru.lua +++ b/nerv/layer/gru.lua @@ -4,11 +4,7 @@ function GRULayer:__init(id, global_conf, layer_conf) -- input1:x -- input2:h -- input3:c (h^~) - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf - + nerv.Layer.__init(self, id, global_conf, layer_conf) if self.dim_in[2] ~= self.dim_out[1] then nerv.error("dim_in[2](%d) mismatch with dim_out[1](%d)", self.dim_in[2], self.dim_out[1]) @@ -17,7 +13,7 @@ function GRULayer:__init(id, global_conf, layer_conf) -- prepare a DAGLayer to hold the lstm structure local pr = layer_conf.pr if pr == nil then - pr = nerv.ParamRepo() + pr = nerv.ParamRepo(nil, self.loc_type) end local function ap(str) @@ -63,7 +59,7 @@ function GRULayer:__init(id, global_conf, layer_conf) }, } - local layerRepo = nerv.LayerRepo(layers, pr, global_conf) + self.lrepo = nerv.LayerRepo(layers, pr, global_conf) local connections = { ["[1]"] = ap("inputXDup[1]"), @@ -97,12 +93,20 @@ function GRULayer:__init(id, global_conf, layer_conf) self.dag = nerv.DAGLayer(self.id, global_conf, {dim_in = self.dim_in, dim_out = self.dim_out, - sub_layers = layerRepo, + sub_layers = self.lrepo, connections = connections}) self:check_dim_len(2, 1) -- x, h and h end +function GRULayer:bind_params() + local pr = layer_conf.pr + if pr == nil then + pr = nerv.ParamRepo(nil, self.loc_type) + end + self.lrepo:rebind(pr) +end + function GRULayer:init(batch_size, chunk_size) self.dag:init(batch_size, chunk_size) end diff --git a/nerv/layer/init.lua b/nerv/layer/init.lua index 54f33ae..146ad8c 100644 --- a/nerv/layer/init.lua +++ b/nerv/layer/init.lua @@ -30,7 +30,18 @@ end local Layer = nerv.class('nerv.Layer') function Layer:__init(id, global_conf, layer_conf) - nerv.error_method_not_implemented() + self.id = id + self.gconf = global_conf + self.lconf = layer_conf + if self.gconf.use_cpu then + self.mat_type = self.gconf.mmat_type + self.loc_type = nerv.ParamRepo.LOC_TYPES.ON_HOST + else + self.mat_type = self.gconf.cumat_type + self.loc_type = nerv.ParamRepo.LOC_TYPES.ON_DEVICE + end + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out end function Layer:init(batch_size) @@ -66,34 +77,41 @@ function Layer:get_params() nerv.error_method_not_implemented() end +function Layer:bind_params() + nerv.error_method_not_implemented() +end + function Layer:get_dim() return self.dim_in, self.dim_out end -function Layer:find_param(pid_list, lconf, gconf, p_type, p_dim) - if type(pid_list) == "string" then - pid_list = {pid_list} +function Layer:find_param(plist, lconf, gconf, p_type, p_dim) + if type(plist) == "string" then + plist = {plist} end - pid_list_str = table.tostring(pid_list) - for i, pid in ipairs(pid_list) do - if lconf[pid] ~= nil then - nerv.info("param [%s] of layer [%s] found in `layer_conf`.", pid, self.id) - return lconf[pid] + if lconf.params == nil then + lconf.params = {} + end + plist_str = table.tostring(plist) + local pid + for i, pname in ipairs(plist) do + if lconf.params[pname] ~= nil then + nerv.info("param id for [%s] of layer [%s] specified in `layer_conf.params`.", pname, self.id) + pid = lconf.params[pname] end - local pid_g = self.id .. '_' .. pid --global identifier - local pr = lconf.pr - local p - if pr ~= nil and pr:has_param(pid_g) == true then - nerv.info("param [%s] of layer [%s] found in `layer_conf.pr`.", pid_list_str, self.id) - p = pr:get_param(pid_g) - return p + if lconf.pr:has_param(pid) then + return lconf.pr:get_param(pid) end end - nerv.info("param [%s] of layer [%s] is not found in `layer_conf` or `layer_conf.pr`, " .. - "switch to auto-generate", pid_list_str, self.id) - local pid_g = self.id .. '_' .. pid_list[1] - p = p_type(pid_g, gconf) - p.trans = gconf.cumat_type(unpack(p_dim)) + pid = self.id .. '_' .. plist[1] + if lconf.pr:has_param(pid) then + nerv.info("param id for [%s] of layer [%s] is generated automatically.", pname, self.id) + return lconf.pr:get_param(pid) + end + nerv.info("param id for [%s] of layer [%s] is not found in the specified param repo, " .. + "switch to auto-generate", plist_str, self.id) + local p = p_type(pid, gconf) + p.trans = self.mat_type(unpack(p_dim)) if type(gconf.param_random) ~= "function" then nerv.error("a param generate function is needed") end diff --git a/nerv/layer/lstm.lua b/nerv/layer/lstm.lua index 500bd87..d4c9212 100644 --- a/nerv/layer/lstm.lua +++ b/nerv/layer/lstm.lua @@ -4,15 +4,11 @@ function LSTMLayer:__init(id, global_conf, layer_conf) -- input1:x -- input2:h -- input3:c - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf - + nerv.Layer.__init(self, id, global_conf, layer_conf) -- prepare a DAGLayer to hold the lstm structure local pr = layer_conf.pr if pr == nil then - pr = nerv.ParamRepo() + pr = nerv.ParamRepo(nil, self.loc_type) end local function ap(str) @@ -66,7 +62,7 @@ function LSTMLayer:__init(id, global_conf, layer_conf) }, } - local layerRepo = nerv.LayerRepo(layers, pr, global_conf) + self.lrepo = nerv.LayerRepo(layers, pr, global_conf) local connections = { ["[1]"] = ap("inputXDup[1]"), @@ -109,12 +105,20 @@ function LSTMLayer:__init(id, global_conf, layer_conf) self.dag = nerv.DAGLayer(self.id, global_conf, {dim_in = self.dim_in, dim_out = self.dim_out, - sub_layers = layerRepo, + sub_layers = self.lrepo, connections = connections}) self:check_dim_len(3, 2) -- x, h, c and h, c end +function LSTMLayer:bind_params() + local pr = layer_conf.pr + if pr == nil then + pr = nerv.ParamRepo(nil, self.loc_type) + end + self.lrepo:rebind(pr) +end + function LSTMLayer:init(batch_size, chunk_size) self.dag:init(batch_size, chunk_size) end diff --git a/nerv/layer/lstm_gate.lua b/nerv/layer/lstm_gate.lua index 1963eba..7a27bab 100644 --- a/nerv/layer/lstm_gate.lua +++ b/nerv/layer/lstm_gate.lua @@ -2,20 +2,19 @@ local LSTMGateLayer = nerv.class('nerv.LSTMGateLayer', 'nerv.Layer') -- NOTE: this is a full matrix gate function LSTMGateLayer:__init(id, global_conf, layer_conf) - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf + nerv.Layer.__init(self, id, global_conf, layer_conf) + self:check_dim_len(-1, 1) --accept multiple inputs + self:bind_params() +end +function LSTMGateLayer:bind_params() for i = 1, #self.dim_in do - self["ltp" .. i] = self:find_param("ltp" .. i, layer_conf, global_conf, + self["ltp" .. i] = self:find_param("ltp" .. i, self.lconf, self.gconf, nerv.LinearTransParam, {self.dim_in[i], self.dim_out[1]}) end - self.bp = self:find_param("bp", layer_conf, global_conf, + self.bp = self:find_param("bp", self.lconf, self.gconf, nerv.BiasParam, {1, self.dim_out[1]}) - - self:check_dim_len(-1, 1) --accept multiple inputs end function LSTMGateLayer:init(batch_size) @@ -69,7 +68,7 @@ function LSTMGateLayer:update(bp_err, input, output) end function LSTMGateLayer:get_params() - local pr = nerv.ParamRepo({self.bp}) + local pr = nerv.ParamRepo({self.bp}, self.loc_type) for i = 1, #self.dim_in do pr:add(self["ltp" .. i].id, self["ltp" .. i]) end diff --git a/nerv/layer/mse.lua b/nerv/layer/mse.lua index 1c218d0..458d086 100644 --- a/nerv/layer/mse.lua +++ b/nerv/layer/mse.lua @@ -1,18 +1,14 @@ local MSELayer = nerv.class("nerv.MSELayer", "nerv.Layer") function MSELayer:__init(id, global_conf, layer_conf) - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf - if self.gconf.use_cpu then - self.mat_type = self.gconf.mmat_type - else - self.mat_type = self.gconf.cumat_type - end + nerv.Layer.__init(self, id, global_conf, layer_conf) self:check_dim_len(2, -1) end +function MSELayer:bind_params() + -- do nothing +end + function MSELayer:init(batch_size) if self.dim_in[1] ~= self.dim_in[2] then nerv.error("mismatching dimensions of previous network output and labels") @@ -61,5 +57,5 @@ function MSELayer:back_propagate(bp_err, next_bp_err, input, output) end function MSELayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/sigmoid.lua b/nerv/layer/sigmoid.lua index 0a8bcdc..a9f9749 100644 --- a/nerv/layer/sigmoid.lua +++ b/nerv/layer/sigmoid.lua @@ -1,13 +1,14 @@ local SigmoidLayer = nerv.class("nerv.SigmoidLayer", "nerv.Layer") function SigmoidLayer:__init(id, global_conf, layer_conf) - self.id = id - self.gconf = global_conf - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out + nerv.Layer.__init(self, id, global_conf, layer_conf) self:check_dim_len(1, 1) end +function SigmoidLayer:bind_params() + -- do nothing +end + function SigmoidLayer:init() if self.dim_in[1] ~= self.dim_out[1] then nerv.error("mismatching dimensions of input and output") @@ -31,5 +32,5 @@ function SigmoidLayer:back_propagate(bp_err, next_bp_err, input, output) end function SigmoidLayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/softmax.lua b/nerv/layer/softmax.lua index 4205b66..f7a5163 100644 --- a/nerv/layer/softmax.lua +++ b/nerv/layer/softmax.lua @@ -1,13 +1,14 @@ local SoftmaxLayer = nerv.class("nerv.SoftmaxLayer", "nerv.Layer") function SoftmaxLayer:__init(id, global_conf, layer_conf) - self.id = id - self.gconf = global_conf - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out + nerv.Layer.__init(self, id, global_conf, layer_conf) self:check_dim_len(1, 1) -- two inputs: nn output and label end +function SoftmaxLayer:bind_params() + -- do nothing +end + function SoftmaxLayer:init(batch_size) if self.dim_in[1] ~= self.dim_out[1] then nerv.error("mismatching dimensions of input and output") @@ -31,5 +32,5 @@ function SoftmaxLayer:back_propagate(bp_err, next_bp_err, input, output) end function SoftmaxLayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/softmax_ce.lua b/nerv/layer/softmax_ce.lua index d7d650e..7b4a80c 100644 --- a/nerv/layer/softmax_ce.lua +++ b/nerv/layer/softmax_ce.lua @@ -1,15 +1,7 @@ local SoftmaxCELayer = nerv.class("nerv.SoftmaxCELayer", "nerv.Layer") function SoftmaxCELayer:__init(id, global_conf, layer_conf) - self.id = id - self.gconf = global_conf - if self.gconf.use_cpu then - self.mat_type = self.gconf.mmat_type - else - self.mat_type = self.gconf.cumat_type - end - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out + nerv.Layer.__init(self, id, global_conf, layer_conf) self.compressed = layer_conf.compressed if self.compressed == nil then self.compressed = false @@ -17,6 +9,10 @@ function SoftmaxCELayer:__init(id, global_conf, layer_conf) self:check_dim_len(2, -1) -- two inputs: nn output and label end +function SoftmaxCELayer:bind_params() + -- do nothing +end + function SoftmaxCELayer:init(batch_size, chunk_size) if not self.compressed and (self.dim_in[1] ~= self.dim_in[2]) then nerv.error("mismatching dimensions of previous network output and labels") @@ -94,5 +90,5 @@ function SoftmaxCELayer:back_propagate(bp_err, next_bp_err, input, output, t) end function SoftmaxCELayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/tanh.lua b/nerv/layer/tanh.lua index e1c32f2..7a19fc8 100644 --- a/nerv/layer/tanh.lua +++ b/nerv/layer/tanh.lua @@ -1,13 +1,14 @@ local TanhLayer = nerv.class("nerv.TanhLayer", "nerv.Layer") function TanhLayer:__init(id, global_conf, layer_conf) - self.id = id - self.gconf = global_conf - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out + nerv.Layer.__init(self, id, global_conf, layer_conf) self:check_dim_len(1, 1) end +function TanhLayer:bind_params() + -- do nothing +end + function TanhLayer:init() if self.dim_in[1] ~= self.dim_out[1] then nerv.error("mismatching dimensions of input and output") @@ -31,5 +32,5 @@ function TanhLayer:back_propagate(bp_err, next_bp_err, input, output) end function TanhLayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/window.lua b/nerv/layer/window.lua index 4933de0..364929f 100644 --- a/nerv/layer/window.lua +++ b/nerv/layer/window.lua @@ -1,12 +1,15 @@ local WindowLayer = nerv.class("nerv.WindowLayer", "nerv.Layer") function WindowLayer:__init(id, global_conf, layer_conf) - self.id = id - self.gconf = global_conf - self.window = layer_conf.window - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out + nerv.Layer.__init(self, id, global_conf, layer_conf) self:check_dim_len(1, 1) + self:bind_params() +end + +function WindowLayer:bind_params() + self.window = self:find_param("window", self.lconf, self.gconf, + nerv.BiasParam, + {1, self.dim_out[1]}) end function WindowLayer:init() @@ -28,5 +31,5 @@ function WindowLayer:propagate(input, output) end function WindowLayer:get_params() - return nerv.ParamRepo({self.window}) + return nerv.ParamRepo({self.window}, self.loc_type) end diff --git a/nerv/matrix/init.lua b/nerv/matrix/init.lua index ef2fb6b..cf85004 100644 --- a/nerv/matrix/init.lua +++ b/nerv/matrix/init.lua @@ -87,6 +87,17 @@ function nerv.Matrix:__mul__(b) return c end +--- A wrapper function for `copy_from` +function nerv.Matrix:copy_to(b, ...) + b:copy_from(self, ...) +end + +--- The base class for all device (in-GPU) matrices +-- @type nerv.CuMatrix + +--- A wrapper function for `copy_fromd` +nerv.CuMatrix.copy_tod = nerv.Matrix.copy_to + --- CUDA float matrices -- @type nerv.CuMatrixFloat @@ -127,6 +138,14 @@ end -- @type nerv.MMatrix --- A wrapper function for `copy_fromh` -function nerv.MMatrix:copy_toh(b, ...) +nerv.MMatrix.copy_toh = nerv.Matrix.copy_to + +--- A wrapper function for `nerv.CuMatrix` copy +function nerv.MMatrix:copy_fromd(b, ...) + b:copy_toh(self, ...) +end + +--- A wrapper function for `nerv.CuMatrix` copy +function nerv.MMatrix:copy_tod(b, ...) b:copy_fromh(self, ...) end diff --git a/nerv/nerv b/nerv/nerv index f73d517..4c20ec7 100644 --- a/nerv/nerv +++ b/nerv/nerv @@ -3,6 +3,7 @@ require 'nerv' local options = {{"help", "h", "boolean", default = false, desc = "print this help message"}, {"use-cpu", "c", "boolean", default = false, desc = "use CPU by default (instead of gpu by default)"}, {"select-gpu", nil, "int", default = -1, desc = "select the GPU for computation, fallback to auto mode if not specified"}} +econf = {} -- environment configuration local function print_help() nerv.printf("Usage: [options] script.lua\n") @@ -31,6 +32,9 @@ if not opts["use-cpu"].val then _add_profile_method(nerv.CuMatrix) nerv.CuMatrix.select_gpu = function (dev) nerv.CuMatrix.