diff options
author | txh18 <[email protected]> | 2015-12-23 16:11:42 +0800 |
---|---|---|
committer | txh18 <[email protected]> | 2015-12-23 16:11:42 +0800 |
commit | fa86b8527d2705f3b2a510434281d052e27d01b8 (patch) | |
tree | 448c9e89d76682246e045a6bb1661c23c1370609 | |
parent | c1f31af8ad1513363eb0b3b8626160cfccb45882 (diff) |
changed affine_recurrent used in lm to affine_recurrent_plusvec
-rw-r--r-- | nerv/examples/lmptb/lmptb/layer/affine_recurrent.lua | 93 | ||||
-rw-r--r-- | nerv/examples/lmptb/lmptb/layer/affine_recurrent_plusvec.lua | 74 | ||||
-rw-r--r-- | nerv/examples/lmptb/lmptb/layer/init.lua | 2 | ||||
-rw-r--r-- | nerv/examples/lmptb/lmptb/layer/select_linear.lua | 2 | ||||
-rw-r--r-- | nerv/examples/lmptb/rnnlm_ptb_main.lua | 176 | ||||
-rw-r--r-- | nerv/layer/affine_recurrent.lua | 52 |
6 files changed, 197 insertions, 202 deletions
diff --git a/nerv/examples/lmptb/lmptb/layer/affine_recurrent.lua b/nerv/examples/lmptb/lmptb/layer/affine_recurrent.lua deleted file mode 100644 index 0a762f0..0000000 --- a/nerv/examples/lmptb/lmptb/layer/affine_recurrent.lua +++ /dev/null @@ -1,93 +0,0 @@ -local Recurrent = nerv.class('nerv.AffineRecurrentLayer', 'nerv.Layer') - ---id: string ---global_conf: table ---layer_conf: table ---Get Parameters -function Recurrent:__init(id, global_conf, layer_conf) - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf - - self.bp = layer_conf.bp - self.ltp_ih = layer_conf.ltp_ih --from input to hidden - self.ltp_hh = layer_conf.ltp_hh --from hidden to hidden - - self:check_dim_len(2, 1) - self.direct_update = layer_conf.direct_update -end - ---Check parameter -function Recurrent:init(batch_size) - if (self.ltp_ih.trans:ncol() ~= self.bp.trans:ncol() or - self.ltp_hh.trans:ncol() ~= self.bp.trans:ncol()) then - nerv.error("mismatching dimensions of ltp and bp") - end - if (self.dim_in[1] ~= self.ltp_ih.trans:nrow() or - self.dim_in[2] ~= self.ltp_hh.trans:nrow()) then - nerv.error("mismatching dimensions of ltp and input") - end - if (self.dim_out[1] ~= self.bp.trans:ncol()) then - nerv.error("mismatching dimensions of bp and output") - end - - self.ltp_ih_grad = self.ltp_ih.trans:create() - self.ltp_hh_grad = self.ltp_hh.trans:create() - self.ltp_ih:train_init() - self.ltp_hh:train_init() - self.bp:train_init() -end - -function Recurrent:update(bp_err, input, output) - if (self.direct_update == true) then - local ltp_ih = self.ltp_ih.trans - local ltp_hh = self.ltp_hh.trans - local bp = self.bp.trans - local ltc_ih = self.ltc_ih - local ltc_hh = self.ltc_hh - local bc = self.bc - local gconf = self.gconf - -- momentum gain - local mmt_gain = 1.0 / (1.0 - gconf.momentum); - local n = input[1]:nrow() * mmt_gain - -- update corrections (accumulated errors) - self.ltp_ih.correction:mul(input[1], bp_err[1], 1.0, gconf.momentum, 'T', 'N') - self.ltc_hh.correction:mul(input[2], bp_err[1], 1.0, gconf.momentum, 'T', 'N') - self.bp.correction:add(bc, bp_err[1]:colsum(), gconf.momentum, 1.0) - -- perform update - ltp_ih:add(ltp_ih, self.ltp_ih.correction, 1.0, -gconf.lrate / n) - ltp_hh:add(ltp_hh, self.ltp_hh.correction, 1.0, -gconf.lrate / n) - bp:add(bp, self.bp.correction, 1.0, -gconf.lrate / n) - -- weight decay - ltp_ih:add(ltp_ih, ltp_ih, 1.0, -gconf.lrate * gconf.wcost) - ltp_hh:add(ltp_hh, ltp_hh, 1.0, -gconf.lrate * gconf.wcost) - else - self.ltp_ih_grad:mul(input[1], bp_err[1], 1.0, 0.0, 'T', 'N') - self.ltp_ih:update(self.ltp_ih_grad) - self.ltp_hh_grad:mul(input[2], bp_err[1], 1.0, 0.0, 'T', 'N') - self.ltp_hh:update(self.ltp_hh_grad) - self.bp:update(bp_err[1]:colsum()) - end -end - -function Recurrent:propagate(input, output) - output[1]:mul(input[1], self.ltp_ih.trans, 1.0, 0.0, 'N', 'N') - output[1]:mul(input[2], self.ltp_hh.trans, 1.0, 1.0, 'N', 'N') - output[1]:add_row(self.bp.trans, 1.0) -end - -function Recurrent:back_propagate(bp_err, next_bp_err, input, output) - next_bp_err[1]:mul(bp_err[1], self.ltp_ih.trans, 1.0, 0.0, 'N', 'T') - next_bp_err[2]:mul(bp_err[1], self.ltp_hh.trans, 1.0, 0.0, 'N', 'T') - for i = 0, next_bp_err[2]:nrow() - 1 do - for j = 0, next_bp_err[2]:ncol() - 1 do - if (next_bp_err[2][i][j] > 10) then next_bp_err[2][i][j] = 10 end - if (next_bp_err[2][i][j] < -10) then next_bp_err[2][i][j] = -10 end - end - end -end - -function Recurrent:get_params() - return {self.ltp_ih, self.ltp_hh, self.bp} -end diff --git a/nerv/examples/lmptb/lmptb/layer/affine_recurrent_plusvec.lua b/nerv/examples/lmptb/lmptb/layer/affine_recurrent_plusvec.lua new file mode 100644 index 0000000..5606a09 --- /dev/null +++ b/nerv/examples/lmptb/lmptb/layer/affine_recurrent_plusvec.lua @@ -0,0 +1,74 @@ +local RecurrentV = nerv.class('nerv.AffineRecurrentPlusVecLayer', 'nerv.Layer') + +--id: string +--global_conf: table +--layer_conf: table +--Get Parameters +function RecurrentV:__init(id, global_conf, layer_conf) + self.id = id + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self.gconf = global_conf + + self.bp = self:find_param("bp", layer_conf, global_conf, nerv.BiasParam, {1, self.dim_out[1]}) --layer_conf.bp + self.ltp_hh = self:find_param("ltp_hh", layer_conf, global_conf, nerv.LinearTransParam, {self.dim_in[2], self.dim_out[1]}) --layer_conf.ltp_hh --from hidden to hidden + + self:check_dim_len(2, 1) + self.direct_update = layer_conf.direct_update + + self.clip = layer_conf.clip --clip error in back_propagate +end + +--Check parameter +function RecurrentV:init(batch_size) + if (self.ltp_hh.trans:ncol() ~= self.bp.trans:ncol()) then + nerv.error("mismatching dimensions of ltp and bp") + end + if (self.dim_in[1] ~= self.ltp_hh.trans:nrow() or + self.dim_in[2] ~= self.ltp_hh.trans:nrow()) then + nerv.error("mismatching dimensions of ltp and input") + end + if (self.dim_out[1] ~= self.bp.trans:ncol()) then + nerv.error("mismatching dimensions of bp and output") + end + + self.ltp_hh_grad = self.ltp_hh.trans:create() + self.ltp_hh:train_init() + self.bp:train_init() +end + +function RecurrentV:batch_resize(batch_size) + -- do nothing +end + +function RecurrentV:update(bp_err, input, output) + --self.ltp_hh_grad:mul(input[2], bp_err[1], 1.0, 0.0, 'T', 'N') + self.ltp_hh:update_by_err_input(bp_err[1], input[2]) + self.bp:update_by_gradient(bp_err[1]:colsum()) +end + +function RecurrentV:propagate(input, output) + output[1]:copy_fromd(input[1]) + output[1]:mul(input[2], self.ltp_hh.trans, 1.0, 1.0, 'N', 'N') + output[1]:add_row(self.bp.trans, 1.0) +end + +function RecurrentV:back_propagate(bp_err, next_bp_err, input, output) + next_bp_err[1]:copy_fromd(bp_err[1]) + next_bp_err[2]:mul(bp_err[1], self.ltp_hh.trans, 1.0, 0.0, 'N', 'T') + --[[ + for i = 0, next_bp_err[2]:nrow() - 1 do + for j = 0, next_bp_err[2]:ncol() - 1 do + if (next_bp_err[2][i][j] > 10) then next_bp_err[2][i][j] = 10 end + if (next_bp_err[2][i][j] < -10) then next_bp_err[2][i][j] = -10 end + end + end + ]]-- + if (self.clip ~= nil) then + next_bp_err[2]:clip(- self.clip, self.clip) + end +end + +function RecurrentV:get_params() + return nerv.ParamRepo({self.ltp_hh, self.bp}) +end diff --git a/nerv/examples/lmptb/lmptb/layer/init.lua b/nerv/examples/lmptb/lmptb/layer/init.lua index ff29126..ae2887c 100644 --- a/nerv/examples/lmptb/lmptb/layer/init.lua +++ b/nerv/examples/lmptb/lmptb/layer/init.lua @@ -1,5 +1,5 @@ require 'lmptb.layer.select_linear' ---require 'lmptb.layer.affine_recurrent' +require 'lmptb.layer.affine_recurrent_plusvec' require 'lmptb.layer.lm_affine_recurrent' diff --git a/nerv/examples/lmptb/lmptb/layer/select_linear.lua b/nerv/examples/lmptb/lmptb/layer/select_linear.lua index 3eba31e..f07eb2f 100644 --- a/nerv/examples/lmptb/lmptb/layer/select_linear.lua +++ b/nerv/examples/lmptb/lmptb/layer/select_linear.lua @@ -38,7 +38,7 @@ function SL:update(bp_err, input, output) --I tried the update_select_rows kernel which uses atomicAdd, but it generates unreproducable result self.ltp.trans:update_select_rows_by_colidx(bp_err[1], input[1], - self.gconf.lrate / self.gconf.batch_size, 0) - self.ltp.trans:add(self.ltp.trans, self.ltp.trans, 1.0, - self.gconf.lrate * self.gconf.wcost / self.gconf.batch_size) + self.ltp.trans:add(self.ltp.trans, self.ltp.trans, 1.0, - self.gconf.lrate * self.gconf.wcost) end function SL:propagate(input, output) diff --git a/nerv/examples/lmptb/rnnlm_ptb_main.lua b/nerv/examples/lmptb/rnnlm_ptb_main.lua index ca62023..e2ca860 100644 --- a/nerv/examples/lmptb/rnnlm_ptb_main.lua +++ b/nerv/examples/lmptb/rnnlm_ptb_main.lua @@ -77,7 +77,7 @@ function prepare_layers(global_conf) local recurrentLconfig = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["clip"] = 10, ["direct_update"] = du, ["pr"] = pr}} local layers = { - ["nerv.AffineRecurrentLayer"] = { + ["nerv.AffineRecurrentPlusVecLayer"] = { ["recurrentL1"] = recurrentLconfig, }, @@ -163,9 +163,11 @@ local train_fn, valid_fn, test_fn global_conf = {} local set = arg[1] --"test" +root_dir = '/home/slhome/txh18/workspace' + if (set == "ptb") then -data_dir = '/home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/PTBdata' +data_dir = root_dir .. '/nerv/nerv/nerv/examples/lmptb/PTBdata' train_fn = data_dir .. '/ptb.train.txt.adds' valid_fn = data_dir .. '/ptb.valid.txt.adds' test_fn = data_dir .. '/ptb.test.txt.adds' @@ -177,10 +179,10 @@ global_conf = { mmat_type = nerv.MMatrixFloat, nn_act_default = 0, - hidden_size = 400, --set to 400 for a stable good test PPL + hidden_size = 300, --set to 400 for a stable good test PPL chunk_size = 15, batch_size = 10, - max_iter = 35, + max_iter = 30, decay_iter = 15, param_random = function() return (math.random() / 5 - 0.1) end, @@ -191,7 +193,7 @@ global_conf = { sche_log_pre = "[SCHEDULER]:", log_w_num = 40000, --give a message when log_w_num words have been processed timer = nerv.Timer(), - work_dir_base = '/home/slhome/txh18/workspace/nerv/play/ptbEXP/tnn_test' + work_dir_base = root_dir .. '/ptb/EXP-nerv/rnnlm_tnn' } elseif (set == "msr_sc") then @@ -259,6 +261,8 @@ end lr_half = false --can not be local, to be set by loadstring start_iter = -1 ppl_last = 100000 +test_iter = -1 +commands_str = "train:test" if (arg[2] ~= nil) then printf("%s applying arg[2](%s)...\n", global_conf.sche_log_pre, arg[2]) loadstring(arg[2])() @@ -271,6 +275,9 @@ global_conf.work_dir = global_conf.work_dir_base .. 'h' .. global_conf.hidden_si global_conf.train_fn_shuf = global_conf.work_dir .. '/train_fn_shuf' global_conf.train_fn_shuf_bak = global_conf.train_fn_shuf .. '_bak' global_conf.param_fn = global_conf.work_dir .. "/params" +global_conf.log_fn = global_conf.work_dir .. '/log_lstm_tnn_' .. commands_str .. os.date("_TT%m_%d_%X",os.time()) +global_conf.log_fn, _ = string.gsub(global_conf.log_fn, ':', '-') +commands = nerv.SUtil.parse_commands_set(commands_str) ----------------printing options--------------------------------- printf("%s printing global_conf...\n", global_conf.sche_log_pre) @@ -281,92 +288,113 @@ nerv.LMUtil.wait(2) printf("%s printing training scheduling options...\n", global_conf.sche_log_pre) print("lr_half", lr_half) print("start_iter", start_iter) +print("test_iter", test_iter) print("ppl_last", ppl_last) printf("%s printing training scheduling end.\n", global_conf.sche_log_pre) nerv.LMUtil.wait(2) ------------------printing options end------------------------------ -math.randomseed(1) - printf("%s creating work_dir...\n", global_conf.sche_log_pre) -os.execute("mkdir -p "..global_conf.work_dir) +os.execute("mkdir -p ".. global_conf.work_dir) os.execute("cp " .. global_conf.train_fn .. " " .. global_conf.train_fn_shuf) +--redirecting log outputs! +nerv.SUtil.log_redirect(global_conf.log_fn) +nerv.LMUtil.wait(2) + +math.randomseed(1) + local vocab = nerv.LMVocab() global_conf["vocab"] = vocab printf("%s building vocab...\n", global_conf.sche_log_pre) global_conf.vocab:build_file(global_conf.vocab_fn, false) ppl_rec = {} -if start_iter == -1 then - prepare_parameters(global_conf, -1) --write pre_generated params to param.0 file -end +local final_iter = -1 +if commands["train"] == 1 then + if start_iter == -1 then + prepare_parameters(global_conf, -1) --write pre_generated params to param.0 file + end + + if start_iter == -1 or start_iter == 0 then + print("===INITIAL VALIDATION===") + local tnn = load_net(global_conf, 0) + global_conf.paramRepo = tnn:get_params() --get auto-generted params + global_conf.paramRepo:export(global_conf.param_fn .. '.0', nil) --some parameters are auto-generated, saved again to param.0 file + local result = LMTrainer.lm_process_file_rnn(global_conf, global_conf.valid_fn, tnn, false) --false update! + nerv.LMUtil.wait(1) + ppl_rec[0] = {} + ppl_rec[0].valid = result:ppl_all("rnn") + ppl_last = ppl_rec[0].valid + ppl_rec[0].train = 0 + ppl_rec[0].test = 0 + ppl_rec[0].lr = 0 + + start_iter = 1 + + print() + end + + for iter = start_iter, global_conf.max_iter, 1 do + final_iter = iter --for final testing + global_conf.sche_log_pre = "[SCHEDULER ITER"..iter.." LR"..global_conf.lrate.."]:" + tnn = load_net(global_conf, iter - 1) + printf("===ITERATION %d LR %f===\n", iter, global_conf.lrate) + result = LMTrainer.lm_process_file_rnn(global_conf, global_conf.train_fn_shuf, tnn, true) --true update! + ppl_rec[iter] = {} + ppl_rec[iter].train = result:ppl_all("rnn") + --shuffling training file + printf("%s shuffling training file\n", global_conf.sche_log_pre) + os.execute('cp ' .. global_conf.train_fn_shuf .. ' ' .. global_conf.train_fn_shuf_bak) + os.execute('cat ' .. global_conf.train_fn_shuf_bak .. ' | sort -R --random-source=/dev/zero > ' .. global_conf.train_fn_shuf) + printf("===PEEK ON TEST %d===\n", iter) + result = LMTrainer.lm_process_file_rnn(global_conf, global_conf.test_fn, tnn, false) --false update! + ppl_rec[iter].test = result:ppl_all("rnn") + printf("===VALIDATION %d===\n", iter) + result = LMTrainer.lm_process_file_rnn(global_conf, global_conf.valid_fn, tnn, false) --false update! + ppl_rec[iter].valid = result:ppl_all("rnn") + ppl_rec[iter].lr = global_conf.lrate + if ((ppl_last / ppl_rec[iter].valid < 1.0003 or lr_half == true) and iter > global_conf.decay_iter) then + global_conf.lrate = (global_conf.lrate * 0.6) + end + if ppl_rec[iter].valid < ppl_last then + printf("%s PPL improves, saving net to file %s.%d...\n", global_conf.sche_log_pre, global_conf.param_fn, iter) + global_conf.paramRepo:export(global_conf.param_fn .. '.' .. tostring(iter), nil) + else + printf("%s PPL did not improve, rejected, copying param file of last iter...\n", global_conf.sche_log_pre) + os.execute('cp ' .. global_conf.param_fn..'.'..tostring(iter - 1) .. ' ' .. global_conf.param_fn..'.'..tostring(iter)) + end + if ppl_last / ppl_rec[iter].valid < 1.0003 or lr_half == true then + lr_half = true + end + if ppl_rec[iter].valid < ppl_last then + ppl_last = ppl_rec[iter].valid + end + printf("\n") + nerv.LMUtil.wait(2) + end -if start_iter == -1 or start_iter == 0 then - print("===INITIAL VALIDATION===") - local tnn = load_net(global_conf, 0) - global_conf.paramRepo = tnn:get_params() --get auto-generted params - global_conf.paramRepo:export(global_conf.param_fn .. '.0', nil) --some parameters are auto-generated, saved again to param.0 file - local result = LMTrainer.lm_process_file_rnn(global_conf, global_conf.valid_fn, tnn, false) --false update! - nerv.LMUtil.wait(1) - ppl_rec[0] = {} - ppl_rec[0].valid = result:ppl_all("rnn") - ppl_last = ppl_rec[0].valid - ppl_rec[0].train = 0 - ppl_rec[0].test = 0 - ppl_rec[0].lr = 0 - - start_iter = 1 - - print() -end + nerv.info("saving final nn to param.final") + os.execute('cp ' .. global_conf.param_fn .. '.' .. tostring(final_iter) .. ' ' .. global_conf.param_fn .. '.final') -local final_iter -for iter = start_iter, global_conf.max_iter, 1 do - final_iter = iter --for final testing - global_conf.sche_log_pre = "[SCHEDULER ITER"..iter.." LR"..global_conf.lrate.."]:" - tnn = load_net(global_conf, iter - 1) - printf("===ITERATION %d LR %f===\n", iter, global_conf.lrate) - result = LMTrainer.lm_process_file_rnn(global_conf, global_conf.train_fn_shuf, tnn, true) --true update! - ppl_rec[iter] = {} - ppl_rec[iter].train = result:ppl_all("rnn") - --shuffling training file - printf("%s shuffling training file\n", global_conf.sche_log_pre) - os.execute('cp ' .. global_conf.train_fn_shuf .. ' ' .. global_conf.train_fn_shuf_bak) - os.execute('cat ' .. global_conf.train_fn_shuf_bak .. ' | sort -R --random-source=/dev/zero > ' .. global_conf.train_fn_shuf) - printf("===PEEK ON TEST %d===\n", iter) - result = LMTrainer.lm_process_file_rnn(global_conf, global_conf.test_fn, tnn, false) --false update! - ppl_rec[iter].test = result:ppl_all("rnn") - printf("===VALIDATION %d===\n", iter) - result = LMTrainer.lm_process_file_rnn(global_conf, global_conf.valid_fn, tnn, false) --false update! - ppl_rec[iter].valid = result:ppl_all("rnn") - ppl_rec[iter].lr = global_conf.lrate - if ((ppl_last / ppl_rec[iter].valid < 1.0003 or lr_half == true) and iter > global_conf.decay_iter) then - global_conf.lrate = (global_conf.lrate * 0.6) + printf("===VALIDATION PPL record===\n") + for i, _ in pairs(ppl_rec) do + printf("<ITER%d LR%.5f train:%.3f valid:%.3f test:%.3f> \n", i, ppl_rec[i].lr, ppl_rec[i].train, ppl_rec[i].valid, ppl_rec[i].test) end - if ppl_rec[iter].valid < ppl_last then - printf("%s PPL improves, saving net to file %s.%d...\n", global_conf.sche_log_pre, global_conf.param_fn, iter) - global_conf.paramRepo:export(global_conf.param_fn .. '.' .. tostring(iter), nil) - else - printf("%s PPL did not improve, rejected, copying param file of last iter...\n", global_conf.sche_log_pre) - os.execute('cp ' .. global_conf.param_fn..'.'..tostring(iter - 1) .. ' ' .. global_conf.param_fn..'.'..tostring(iter)) - end - if ppl_last / ppl_rec[iter].valid < 1.0003 or lr_half == true then - lr_half = true + printf("\n") +end --if commands["train"] + +if commands["test"] == 1 then + if final_iter ~= -1 and test_iter == -1 then + test_iter = final_iter end - if ppl_rec[iter].valid < ppl_last then - ppl_last = ppl_rec[iter].valid + if test_iter == -1 then + test_iter = "final" end - printf("\n") - nerv.LMUtil.wait(2) -end -printf("===VALIDATION PPL record===\n") -for i, _ in pairs(ppl_rec) do - printf("<ITER%d LR%.5f train:%.3f valid:%.3f test:%.3f> \n", i, ppl_rec[i].lr, ppl_rec[i].train, ppl_rec[i].valid, ppl_rec[i].test) -end -printf("\n") -printf("===FINAL TEST===\n") -global_conf.sche_log_pre = "[SCHEDULER FINAL_TEST]:" -tnn = load_net(global_conf, final_iter) -LMTrainer.lm_process_file_rnn(global_conf, global_conf.test_fn, tnn, false) --false update! + + printf("===FINAL TEST===\n") + global_conf.sche_log_pre = "[SCHEDULER FINAL_TEST]:" + tnn = load_net(global_conf, test_iter) + LMTrainer.lm_process_file_rnn(global_conf, global_conf.test_fn, tnn, false) --false update! +end --if commands["test"] diff --git a/nerv/layer/affine_recurrent.lua b/nerv/layer/affine_recurrent.lua index d537f4a..fd6f38f 100644 --- a/nerv/layer/affine_recurrent.lua +++ b/nerv/layer/affine_recurrent.lua @@ -9,31 +9,37 @@ function Recurrent:__init(id, global_conf, layer_conf) self.dim_in = layer_conf.dim_in self.dim_out = layer_conf.dim_out self.gconf = global_conf + self.log_pre = self.id .. "[LOG]" self.bp = self:find_param("bp", layer_conf, global_conf, nerv.BiasParam, {1, self.dim_out[1]}) --layer_conf.bp - self.ltp_hh = self:find_param("ltp_hh", layer_conf, global_conf, nerv.LinearTransParam, {self.dim_in[2], self.dim_out[1]}) --layer_conf.ltp_hh --from hidden to hidden + self.ltp_hh = self:find_param("ltphh", layer_conf, global_conf, nerv.LinearTransParam, {self.dim_in[2], self.dim_out[1]}) --layer_conf.ltp_hh --from hidden to hidden + self.ltp_ih = self:find_param("ltpih", layer_conf, global_conf, nerv.LinearTransParam, {self.dim_in[1], self.dim_out[1]}) --layer_conf.ltp_hh --from hidden to hidden self:check_dim_len(2, 1) self.direct_update = layer_conf.direct_update self.clip = layer_conf.clip --clip error in back_propagate + if self.clip ~= nil then + nerv.info("%s creating, will clip the error by %f", self.log_pre, self.clip) + end end --Check parameter function Recurrent:init(batch_size) - if (self.ltp_hh.trans:ncol() ~= self.bp.trans:ncol()) then + if self.ltp_hh.trans:ncol() ~= self.bp.trans:ncol() or + self.ltp_ih.trans:ncol() ~= self.bp.trans:ncol() then nerv.error("mismatching dimensions of ltp and bp") end - if (self.dim_in[1] ~= self.ltp_hh.trans:nrow() or - self.dim_in[2] ~= self.ltp_hh.trans:nrow()) then + if self.dim_in[1] ~= self.ltp_ih.trans:nrow() or + self.dim_in[2] ~= self.ltp_hh.trans:nrow() then nerv.error("mismatching dimensions of ltp and input") end if (self.dim_out[1] ~= self.bp.trans:ncol()) then nerv.error("mismatching dimensions of bp and output") end - self.ltp_hh_grad = self.ltp_hh.trans:create() self.ltp_hh:train_init() + self.ltp_ih:train_init() self.bp:train_init() end @@ -42,39 +48,19 @@ function Recurrent:batch_resize(batch_size) end function Recurrent:update(bp_err, input, output) - if self.direct_update == true then - local ltp_hh = self.ltp_hh.trans - local bp = self.bp.trans - local gconf = self.gconf - if (gconf.momentum > 0) then - -- momentum gain - local mmt_gain = 1.0 / (1.0 - gconf.momentum) - local n = input[1]:nrow() * mmt_gain - -- update corrections (accumulated errors) - self.ltp_hh.correction:mul(input[2], bp_err[1], 1.0, gconf.momentum, 'T', 'N') - self.bp.correction:add(self.bp.correction, bp_err[1]:colsum(), gconf.momentum, 1.0) - -- perform update and weight decay - ltp_hh:add(ltp_hh, self.ltp_hh.correction, 1.0 - gconf.lrate * gconf.wcost / gconf.batch_size, - gconf.lrate / n) - bp:add(bp, self.bp.correction, 1.0 - gconf.lrate * gconf.wcost / gconf.batch_size, - gconf.lrate / n) - else - ltp_hh:mul(input[2], bp_err[1], - gconf.lrate / gconf.batch_size, 1.0 - gconf.wcost * gconf.lrate / gconf.batch_size, 'T', 'N') - bp:add(bp, bp_err[1]:colsum(), 1.0 - gconf.lrate * gconf.wcost / gconf.batch_size, - gconf.lrate / gconf.batch_size) - end - else - --self.ltp_hh_grad:mul(input[2], bp_err[1], 1.0, 0.0, 'T', 'N') - self.ltp_hh:update_by_err_input(bp_err[1], input[2]) - self.bp:update_by_gradient(bp_err[1]:colsum()) - end + self.ltp_ih:update_by_err_input(bp_err[1], input[1]) + self.ltp_hh:update_by_err_input(bp_err[1], input[2]) + self.bp:update_by_gradient(bp_err[1]:colsum()) end function Recurrent:propagate(input, output) - output[1]:copy_fromd(input[1]) + output[1]:mul(input[1], self.ltp_ih.trans, 1.0, 0.0, 'N', 'N') output[1]:mul(input[2], self.ltp_hh.trans, 1.0, 1.0, 'N', 'N') output[1]:add_row(self.bp.trans, 1.0) end function Recurrent:back_propagate(bp_err, next_bp_err, input, output) - next_bp_err[1]:copy_fromd(bp_err[1]) + next_bp_err[1]:mul(bp_err[1], self.ltp_ih.trans, 1.0, 0.0, 'N', 'T') next_bp_err[2]:mul(bp_err[1], self.ltp_hh.trans, 1.0, 0.0, 'N', 'T') --[[ for i = 0, next_bp_err[2]:nrow() - 1 do @@ -84,11 +70,11 @@ function Recurrent:back_propagate(bp_err, next_bp_err, input, output) end end ]]-- - if (self.clip ~= nil) then - next_bp_err[2]:clip(- self.clip, self.clip) + if self.clip ~= nil then + next_bp_err[2]:clip(-self.clip, self.clip) end end function Recurrent:get_params() - return nerv.ParamRepo({self.ltp_hh, self.bp}) + return nerv.ParamRepo({self.ltp_ih, self.ltp_hh, self.bp}) end |