diff options
author | txh18 <[email protected]> | 2015-11-24 15:37:02 +0800 |
---|---|---|
committer | txh18 <[email protected]> | 2015-11-24 15:37:02 +0800 |
commit | 914a026734db6608e04987e9fcec9c82612e8673 (patch) | |
tree | 89e4db2987b6c757667aef96cc41974fda302927 | |
parent | f829b2b49d1db7fb6a49109722b9c7a41ae9324a (diff) |
added wcost for biasparam in lm_trainer
-rw-r--r-- | nerv/examples/lmptb/lm_trainer.lua | 9 | ||||
-rw-r--r-- | nerv/examples/lmptb/lmptb/layer/lm_affine_recurrent.lua | 2 | ||||
-rw-r--r-- | nerv/examples/lmptb/lmptb/lmutil.lua | 6 | ||||
-rw-r--r-- | nerv/examples/lmptb/tnn/layer_dag_t.lua | 13 | ||||
-rw-r--r-- | nerv/examples/lmptb/tnn_ptb_main.lua | 6 | ||||
-rw-r--r-- | nerv/examples/lmptb/unfold_ptb_main.lua | 10 |
6 files changed, 32 insertions, 14 deletions
diff --git a/nerv/examples/lmptb/lm_trainer.lua b/nerv/examples/lmptb/lm_trainer.lua index 62d8b50..185bc6d 100644 --- a/nerv/examples/lmptb/lm_trainer.lua +++ b/nerv/examples/lmptb/lm_trainer.lua @@ -2,13 +2,20 @@ require 'lmptb.lmvocab' require 'lmptb.lmfeeder' require 'lmptb.lmutil' require 'lmptb.layer.init' -require 'rnn.init' +require 'tnn.init' require 'lmptb.lmseqreader' local LMTrainer = nerv.class('nerv.LMTrainer') local printf = nerv.printf +--The bias param update in nerv don't have wcost added +function nerv.BiasParam:update_by_gradient(gradient) + local gconf = self.gconf + local l2 = 1 - gconf.lrate * gconf.wcost + self:_update_by_gradient(gradient, l2, l2) +end + --Returns: LMResult function LMTrainer.lm_process_file(global_conf, fn, tnn, do_train) local reader = nerv.LMSeqReader(global_conf, global_conf.batch_size, global_conf.chunk_size, global_conf.vocab) diff --git a/nerv/examples/lmptb/lmptb/layer/lm_affine_recurrent.lua b/nerv/examples/lmptb/lmptb/layer/lm_affine_recurrent.lua index a5ecce1..c43e567 100644 --- a/nerv/examples/lmptb/lmptb/layer/lm_affine_recurrent.lua +++ b/nerv/examples/lmptb/lmptb/layer/lm_affine_recurrent.lua @@ -14,7 +14,7 @@ function LMRecurrent:propagate(input, output) output[1]:copy_fromd(input[1]) if (self.independent == true) then for i = 1, input[1]:nrow() do - if (self.gconf.input_word_id[self.id][0][i - 1] == self.break_id) then --here is sentence break + if (self.gconf.input_word_id[self.id][i - 1][0] == self.break_id) then --here is sentence break input[2][i - 1]:fill(0) end end diff --git a/nerv/examples/lmptb/lmptb/lmutil.lua b/nerv/examples/lmptb/lmptb/lmutil.lua index 821aa94..71e8e17 100644 --- a/nerv/examples/lmptb/lmptb/lmutil.lua +++ b/nerv/examples/lmptb/lmptb/lmutil.lua @@ -48,15 +48,15 @@ end --Returns: nerv.MMatrixInt --Set the matrix to be ids of the words, id starting at 1, not 0 function Util.set_id(m, list, vocab) - if (m:ncol() ~= #list or m:nrow() ~= 1) then + if (m:nrow() ~= #list or m:ncol() ~= 1) then nerv.error("nrow of matrix mismatch with list or its col not one") end for i = 1, #list, 1 do --index in matrix starts at 0 if (list[i] ~= vocab.null_token) then - m[0][i - 1] = vocab:get_word_str(list[i]).id + m[i - 1][0] = vocab:get_word_str(list[i]).id else - m[0][i - 1] = 0 + m[i - 1][0] = 0 end end return m diff --git a/nerv/examples/lmptb/tnn/layer_dag_t.lua b/nerv/examples/lmptb/tnn/layer_dag_t.lua index cd5fba7..ade65cc 100644 --- a/nerv/examples/lmptb/tnn/layer_dag_t.lua +++ b/nerv/examples/lmptb/tnn/layer_dag_t.lua @@ -266,7 +266,9 @@ function DAGLayerT:set_inputs(input, t) end local layer = self.inputs[i][1] local port = self.inputs[i][2] - + if layer.inputs[t] == nil then + layer.inputs[t] = {} + end layer.inputs[t][port] = input[i] end end @@ -278,6 +280,9 @@ function DAGLayerT:set_outputs(output, t) end local layer = self.outputs[i][1] local port = self.outputs[i][2] + if layer.outputs[t] == nil then + layer.outputs[t] = {} + end layer.outputs[t][port] = output[i] end end @@ -286,6 +291,9 @@ function DAGLayerT:set_err_inputs(bp_err, t) for i = 1, #self.dim_out do local layer = self.outputs[i][1] local port = self.outputs[i][2] + if layer.err_inputs[t] == nil then + layer.err_inputs[t] = {} + end layer.err_inputs[t][port] = bp_err[i] end end @@ -294,6 +302,9 @@ function DAGLayerT:set_err_outputs(next_bp_err, t) for i = 1, #self.dim_in do local layer = self.inputs[i][1] local port = self.inputs[i][2] + if layer.err_outputs[t] == nil then + layer.err_outputs[t] = {} + end layer.err_outputs[t][port] = next_bp_err[i] end end diff --git a/nerv/examples/lmptb/tnn_ptb_main.lua b/nerv/examples/lmptb/tnn_ptb_main.lua index 66c7317..9156b61 100644 --- a/nerv/examples/lmptb/tnn_ptb_main.lua +++ b/nerv/examples/lmptb/tnn_ptb_main.lua @@ -172,7 +172,7 @@ test_fn = data_dir .. '/ptb.test.txt.adds' vocab_fn = data_dir .. '/vocab' global_conf = { - lrate = 1, wcost = 1e-5, momentum = 0, + lrate = 1, wcost = 1e-6, momentum = 0, cumat_type = nerv.CuMatrixFloat, mmat_type = nerv.MMatrixFloat, nn_act_default = 0, @@ -181,7 +181,7 @@ global_conf = { chunk_size = 15, batch_size = 10, max_iter = 35, - decay_iter = 16, + decay_iter = 15, param_random = function() return (math.random() / 5 - 0.1) end, train_fn = train_fn, @@ -267,7 +267,7 @@ else printf("%s not user setting, all default...\n", global_conf.sche_log_pre) end -global_conf.work_dir = global_conf.work_dir_base .. 'h' .. global_conf.hidden_size .. 'ch' .. global_conf.chunk_size .. 'ba' .. global_conf.batch_size .. 'slr' .. global_conf.lrate +global_conf.work_dir = global_conf.work_dir_base .. 'h' .. global_conf.hidden_size .. 'ch' .. global_conf.chunk_size .. 'ba' .. global_conf.batch_size .. 'slr' .. global_conf.lrate .. 'wc' .. global_conf.wcost global_conf.train_fn_shuf = global_conf.work_dir .. '/train_fn_shuf' global_conf.train_fn_shuf_bak = global_conf.train_fn_shuf .. '_bak' global_conf.param_fn = global_conf.work_dir .. "/params" diff --git a/nerv/examples/lmptb/unfold_ptb_main.lua b/nerv/examples/lmptb/unfold_ptb_main.lua index 5affadf..eebab2b 100644 --- a/nerv/examples/lmptb/unfold_ptb_main.lua +++ b/nerv/examples/lmptb/unfold_ptb_main.lua @@ -174,7 +174,7 @@ function propagateFile(global_conf, dagL, fn, config) local dagL_input = {} for i = 1, global_conf.bptt + 1 do - dagL_input[i] = global_conf.cumat_type(1, global_conf.batch_size) --changed to row vector, debughtx + dagL_input[i] = global_conf.cumat_type(global_conf.batch_size, 1) --changed to row vector, debughtx end dagL_input[global_conf.bptt + 2] = global_conf.cumat_type(global_conf.batch_size, global_conf.hidden_size) dagL_input[global_conf.bptt + 3] = global_conf.cumat_type(global_conf.batch_size, global_conf.vocab:size()) @@ -210,7 +210,7 @@ function propagateFile(global_conf, dagL, fn, config) global_conf.timer:toc("dagL-propagate") hidden_store[tnow] = global_conf.cumat_type(global_conf.batch_size, global_conf.hidden_size) - hidden_store[tnow]:copy_fromd(sigmoidL_ref.outputs[1]) + hidden_store[tnow]:copy_fromd(sigmoidL_ref.outputs[1][1]) if (config.do_train == true) then global_conf.timer:tic("dagL-back_propagate") @@ -280,7 +280,7 @@ if (set == "ptb") then cumat_type = nerv.CuMatrixFloat, mmat_type = nerv.MMatrixFloat, - hidden_size = 200, + hidden_size = 50, batch_size = 10, bptt = 6, --train bptt_block's words. could be set to zero max_iter = 18, @@ -291,7 +291,7 @@ if (set == "ptb") then valid_fn = valid_fn, test_fn = test_fn, sche_log_pre = "[SCHEDULER]:", - log_w_num = 100000, --give a message when log_w_num words have been processed + log_w_num = 1000, --give a message when log_w_num words have been processed timer = nerv.Timer() } global_conf.work_dir = work_dir_base.."/h"..global_conf.hidden_size.."bp"..global_conf.bptt.."slr"..global_conf.lrate --..os.date("_%bD%dH%H") --comment this for testing @@ -304,7 +304,7 @@ elseif (set == "test") then global_conf = { lrate = 0.1, wcost = 1e-6, momentum = 0, cumat_type = nerv.CuMatrixFloat, - mmat_type = nerv.CuMatrixFloat, + mmat_type = nerv.MMatrixFloat, hidden_size = 5, batch_size = 1, |