From 89d57b6fae6bcb0195a73fb97ab6870ee0d0ce20 Mon Sep 17 00:00:00 2001 From: Determinant Date: Wed, 30 Mar 2016 13:54:14 +0800 Subject: fix bug in passing err_input to network; gen zero vectors for bias --- nerv/examples/asr_trainer.lua | 17 ++++++++++------- nerv/layer/affine.lua | 3 ++- nerv/layer/bias.lua | 3 ++- nerv/layer/init.lua | 12 +++++++++--- nerv/layer/lstm_gate.lua | 3 ++- nerv/layer/window.lua | 3 ++- 6 files changed, 27 insertions(+), 14 deletions(-) (limited to 'nerv') diff --git a/nerv/examples/asr_trainer.lua b/nerv/examples/asr_trainer.lua index 38ba6e9..9a764fc 100644 --- a/nerv/examples/asr_trainer.lua +++ b/nerv/examples/asr_trainer.lua @@ -39,12 +39,8 @@ local function build_trainer(ifname) local buffer = make_buffer(make_readers(scp_file, layer_repo)) -- initialize the network gconf.cnt = 0 - local err_input = {{}} local output = {{}} for i = 1, gconf.chunk_size do - local mini_batch = mat_type(gconf.batch_size, 1) - mini_batch:fill(1) - table.insert(err_input[1], mini_batch) table.insert(output[1], mat_type(gconf.batch_size, 1)) end network:epoch_init() @@ -91,7 +87,7 @@ local function build_trainer(ifname) do_train = bp, input = input, output = output, - err_input = err_input, + err_input = {gconf.mask}, err_output = err_output}) network:propagate() if bp then @@ -254,8 +250,15 @@ nerv.set_logfile(path.join(working_dir, logfile_name)) -- start the training local trainer = build_trainer(pf0) local pr_prev -gconf.accu_best, pr_prev = trainer(nil, gconf.cv_scp, false) +-- initial cross-validation +local param_prefix = string.format("%s_%s", + string.gsub( + (string.gsub(pf0[1], "(.*/)(.*)", "%2")), + "(.*)%..*", "%1"), + os.date(date_pattern)) +gconf.accu_best, pr_prev = trainer(path.join(working_dir, param_prefix), gconf.cv_scp, false) nerv.info("initial cross validation: %.3f", gconf.accu_best) +-- main loop for i = gconf.cur_iter, gconf.max_iter do local stop = false gconf.cur_iter = i @@ -264,7 +267,7 @@ for i = gconf.cur_iter, gconf.max_iter do nerv.info("[NN] begin iteration %d with lrate = %.6f", i, gconf.lrate) local accu_tr = trainer(nil, gconf.tr_scp, true, rebind_param_repo) nerv.info("[TR] training set %d: %.3f", i, accu_tr) - local param_prefix = string.format("%s_%s_iter_%d_lr%f_tr%.3f", + param_prefix = string.format("%s_%s_iter_%d_lr%f_tr%.3f", string.gsub( (string.gsub(pf0[1], "(.*/)(.*)", "%2")), "(.*)%..*", "%1"), diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua index a05ae17..a1c92b1 100644 --- a/nerv/layer/affine.lua +++ b/nerv/layer/affine.lua @@ -90,7 +90,8 @@ function AffineLayer:bind_params() self.ltp = self.ltp1 -- alias of ltp1 self.bp = self:find_param("bp", self.lconf, self.gconf, nerv.BiasParam, - {1, self.dim_out[1]}) + {1, self.dim_out[1]}, + nerv.Param.gen_zero) end diff --git a/nerv/layer/bias.lua b/nerv/layer/bias.lua index 191be78..03e310d 100644 --- a/nerv/layer/bias.lua +++ b/nerv/layer/bias.lua @@ -9,7 +9,8 @@ end function BiasLayer:bind_params() self.bias = self:find_param("bias", self.lconf, self.gconf, nerv.BiasParam, - {1, self.dim_out[1]}) + {1, self.dim_out[1]}, + nerv.Param.gen_zero) end function BiasLayer:init() diff --git a/nerv/layer/init.lua b/nerv/layer/init.lua index d266773..c5b7657 100644 --- a/nerv/layer/init.lua +++ b/nerv/layer/init.lua @@ -27,6 +27,10 @@ function Param:update(gradient) nerv.error_method_not_implemented() end +function Param:gen_zero() + return 0 +end + local Layer = nerv.class('nerv.Layer') function Layer:__init(id, global_conf, layer_conf) @@ -93,7 +97,7 @@ function Layer:get_sublayer(id) nerv.error('primitive layer does not have sublayers') end -function Layer:find_param(plist, lconf, gconf, p_type, p_dim) +function Layer:find_param(plist, lconf, gconf, p_type, p_dim, p_gen) if type(plist) == "string" then plist = {plist} end @@ -120,10 +124,12 @@ function Layer:find_param(plist, lconf, gconf, p_type, p_dim) "switch to auto-generate", plist_str, self.id) local p = p_type(pid, gconf) p.trans = self.mat_type(unpack(p_dim)) - if type(gconf.param_random) ~= "function" then + p_gen = p_gen or gconf.param_gen + or gconf.param_random -- obsolete name + if type(p_gen) ~= "function" then nerv.error("a param generate function is needed") end - p.trans:generate(gconf.param_random) + p.trans:generate(p_gen) return p end diff --git a/nerv/layer/lstm_gate.lua b/nerv/layer/lstm_gate.lua index 9d79b04..e3b11b4 100644 --- a/nerv/layer/lstm_gate.lua +++ b/nerv/layer/lstm_gate.lua @@ -18,7 +18,8 @@ function LSTMGateLayer:bind_params() end end self.bp = self:find_param("bp", self.lconf, self.gconf, - nerv.BiasParam, {1, self.dim_out[1]}) + nerv.BiasParam, {1, self.dim_out[1]}, + nerv.Param.gen_zero) end function LSTMGateLayer:init(batch_size) diff --git a/nerv/layer/window.lua b/nerv/layer/window.lua index 364929f..729ab58 100644 --- a/nerv/layer/window.lua +++ b/nerv/layer/window.lua @@ -9,7 +9,8 @@ end function WindowLayer:bind_params() self.window = self:find_param("window", self.lconf, self.gconf, nerv.BiasParam, - {1, self.dim_out[1]}) + {1, self.dim_out[1]}, + nerv.Param.gen_zero) end function WindowLayer:init() -- cgit v1.2.3-70-g09d2