diff options
author | txh18 <[email protected]> | 2015-11-12 17:13:35 +0800 |
---|---|---|
committer | txh18 <[email protected]> | 2015-11-12 17:13:35 +0800 |
commit | c56722702c099a6d4b3ea1599836e6226bdccc46 (patch) | |
tree | d6d7c741e69ac1c33d34c3da691da7def416012e | |
parent | ae2bb39ec6ea46a8bdfbd3b8b145ecfb7ca9032f (diff) |
cleaning files...
-rw-r--r-- | nerv/examples/lmptb/lm_trainer.lua | 89 | ||||
-rw-r--r-- | nerv/examples/lmptb/rnn/tnn.lua | 31 | ||||
-rw-r--r-- | nerv/examples/lmptb/tnn_ptb_main.lua (renamed from nerv/examples/lmptb/m-tests/tnn_test.lua) | 126 | ||||
-rw-r--r-- | nerv/examples/lmptb/unfold_ptb_main.lua (renamed from nerv/examples/lmptb/main.lua) | 5 |
4 files changed, 127 insertions, 124 deletions
diff --git a/nerv/examples/lmptb/lm_trainer.lua b/nerv/examples/lmptb/lm_trainer.lua new file mode 100644 index 0000000..d34634c --- /dev/null +++ b/nerv/examples/lmptb/lm_trainer.lua @@ -0,0 +1,89 @@ +require 'lmptb.lmvocab' +require 'lmptb.lmfeeder' +require 'lmptb.lmutil' +require 'lmptb.layer.init' +require 'rnn.init' +require 'lmptb.lmseqreader' + +local LMTrainer = nerv.class('nerv.LMTrainer') + +local printf = nerv.printf + +--Returns: LMResult +function LMTrainer.lm_process_file(global_conf, fn, tnn, do_train) + local reader = nerv.LMSeqReader(global_conf, global_conf.batch_size, global_conf.chunk_size, global_conf.vocab) + reader:open_file(fn) + local result = nerv.LMResult(global_conf, global_conf.vocab) + result:init("rnn") + + tnn:flush_all() --caution: will also flush the inputs from the reader! + + local next_log_wcn = global_conf.log_w_num + + while (1) do + local r, feeds + + r, feeds = tnn:getFeedFromReader(reader) + if (r == false) then break end + + for t = 1, global_conf.chunk_size do + tnn.err_inputs_m[t][1]:fill(1) + for i = 1, global_conf.batch_size do + if (bit.band(feeds.flags_now[t][i], nerv.TNN.FC.HAS_LABEL) == 0) then + tnn.err_inputs_m[t][1][i - 1][0] = 0 + end + end + end + + --[[ + for j = 1, global_conf.chunk_size, 1 do + for i = 1, global_conf.batch_size, 1 do + printf("%s[L(%s)] ", feeds.inputs_s[j][i], feeds.labels_s[j][i]) --vocab:get_word_str(input[i][j]).id + end + printf("\n") + end + printf("\n") + ]]-- + + tnn:net_propagate() + + if (do_train == true) then + tnn:net_backpropagate(false) + tnn:net_backpropagate(true) + end + + for t = 1, global_conf.chunk_size, 1 do + for i = 1, global_conf.batch_size, 1 do + if (feeds.labels_s[t][i] ~= global_conf.vocab.null_token) then + result:add("rnn", feeds.labels_s[t][i], math.exp(tnn.outputs_m[t][1][i - 1][0])) + end + end + end + if (result["rnn"].cn_w > next_log_wcn) then + next_log_wcn = next_log_wcn + global_conf.log_w_num + printf("%s %d words processed %s.\n", global_conf.sche_log_pre, result["rnn"].cn_w, os.date()) + printf("\t%s log prob per sample :%f.\n", global_conf.sche_log_pre, result:logp_sample("rnn")) + nerv.LMUtil.wait(0.1) + end + + --[[ + for t = 1, global_conf.chunk_size do + print(tnn.outputs_m[t][1]) + end + ]]-- + + tnn:moveRightToNextMB() + + collectgarbage("collect") + + --break --debug + end + + printf("%s Displaying result:\n", global_conf.sche_log_pre) + printf("%s %s\n", global_conf.sche_log_pre, result:status("rnn")) + printf("%s Doing on %s end.\n", global_conf.sche_log_pre, fn) + + return result +end + + diff --git a/nerv/examples/lmptb/rnn/tnn.lua b/nerv/examples/lmptb/rnn/tnn.lua index fc5321d..9850fe5 100644 --- a/nerv/examples/lmptb/rnn/tnn.lua +++ b/nerv/examples/lmptb/rnn/tnn.lua @@ -1,5 +1,4 @@ local TNN = nerv.class("nerv.TNN", "nerv.Layer") -local DAGLayer = TNN local function parse_id(str) --used to parse layerid[portid],time @@ -91,7 +90,7 @@ end function TNN:__init(id, global_conf, layer_conf) local layers = {} - local inputs_p = {} --map:port of the TDAGLayer to layer ref and port + local inputs_p = {} --map:port of the TNN to layer ref and port local outputs_p = {} local dim_in = layer_conf.dim_in local dim_out = layer_conf.dim_out @@ -394,7 +393,6 @@ function TNN:propagate_dfs(ref, t) if (seq_start > 0 or seq_end > 0) then for p, conn in pairs(ref.o_conns_p) do if ((ref.o_conns_p[p].time > 0 and seq_end > 0) or (ref.o_conns_p[p].time < 0 and seq_start > 0)) then - self.gconf.fz2 = self.gconf.fz2 + 1 ref.outputs_m[t][p][i - 1]:fill(self.gconf.nn_act_default) end end @@ -502,7 +500,6 @@ function TNN:backpropagate_dfs(ref, t, do_update) if (seq_start > 0 or seq_end > 0) then for p, conn in pairs(ref.i_conns_p) do if ((ref.i_conns_p[p].time > 0 and seq_start > 0) or (ref.i_conns_p[p].time < 0 and seq_end > 0)) then --cross-border, set to zero - self.gconf.fz = self.gconf.fz + 1 ref.err_outputs_m[t][p][i - 1]:fill(0) end end @@ -534,29 +531,3 @@ function TNN:get_params() return nerv.ParamRepo.merge(param_repos) end -DAGLayer.PORT_TYPES = { - INPUT = {}, - OUTPUT = {}, - ERR_INPUT = {}, - ERR_OUTPUT = {} -} - -function DAGLayer:get_intermediate(id, port_type) - if id == "<input>" or id == "<output>" then - nerv.error("an actual real layer id is expected") - end - local layer = self.layers[id] - if layer == nil then - nerv.error("layer id %s not found", id) - end - if port_type == DAGLayer.PORT_TYPES.INPUT then - return layer.inputs - elseif port_type == DAGLayer.PORT_TYPES.OUTPUT then - return layer.outputs - elseif port_type == DAGLayer.PORT_TYPES.ERR_INPUT then - return layer.err_inputs - elseif port_type == DAGLayer.PORT_TYPES.ERR_OUTPUT then - return layer.err_outputs - end - nerv.error("unrecognized port type") -end diff --git a/nerv/examples/lmptb/m-tests/tnn_test.lua b/nerv/examples/lmptb/tnn_ptb_main.lua index e2c0d39..803ae68 100644 --- a/nerv/examples/lmptb/m-tests/tnn_test.lua +++ b/nerv/examples/lmptb/tnn_ptb_main.lua @@ -4,9 +4,11 @@ require 'lmptb.lmutil' require 'lmptb.layer.init' require 'rnn.init' require 'lmptb.lmseqreader' +require 'lm_trainer' --[[global function rename]]-- -printf = nerv.printf +local printf = nerv.printf +local LMTrainer = nerv.LMTrainer --[[global function rename ends]]-- --global_conf: table @@ -144,89 +146,6 @@ function load_net(global_conf) return tnn, paramRepo end ---Returns: LMResult -function lm_process_file(global_conf, fn, tnn, do_train) - local reader = nerv.LMSeqReader(global_conf, global_conf.batch_size, global_conf.chunk_size, global_conf.vocab) - reader:open_file(fn) - local result = nerv.LMResult(global_conf, global_conf.vocab) - result:init("rnn") - - tnn:flush_all() --caution: will also flush the inputs from the reader! - - local next_log_wcn = global_conf.log_w_num - - global_conf.fz = 0 - global_conf.fz2 = 0 - - while (1) do - local r, feeds - - r, feeds = tnn:getFeedFromReader(reader) - if (r == false) then break end - - for t = 1, global_conf.chunk_size do - tnn.err_inputs_m[t][1]:fill(1) - for i = 1, global_conf.batch_size do - if (bit.band(feeds.flags_now[t][i], nerv.TNN.FC.HAS_LABEL) == 0) then - tnn.err_inputs_m[t][1][i - 1][0] = 0 - end - end - end - - --[[ - for j = 1, global_conf.chunk_size, 1 do - for i = 1, global_conf.batch_size, 1 do - printf("%s[L(%s)] ", feeds.inputs_s[j][i], feeds.labels_s[j][i]) --vocab:get_word_str(input[i][j]).id - end - printf("\n") - end - printf("\n") - ]]-- - - tnn:net_propagate() - - if (do_train == true) then - tnn:net_backpropagate(false) - tnn:net_backpropagate(true) - end - - for t = 1, global_conf.chunk_size, 1 do - for i = 1, global_conf.batch_size, 1 do - if (feeds.labels_s[t][i] ~= global_conf.vocab.null_token) then - result:add("rnn", feeds.labels_s[t][i], math.exp(tnn.outputs_m[t][1][i - 1][0])) - end - end - end - if (result["rnn"].cn_w > next_log_wcn) then - next_log_wcn = next_log_wcn + global_conf.log_w_num - printf("%s %d words processed %s.\n", global_conf.sche_log_pre, result["rnn"].cn_w, os.date()) - printf("\t%s log prob per sample :%f.\n", global_conf.sche_log_pre, result:logp_sample("rnn")) - nerv.LMUtil.wait(0.1) - end - - --[[ - for t = 1, global_conf.chunk_size do - print(tnn.outputs_m[t][1]) - end - ]]-- - - tnn:moveRightToNextMB() - - collectgarbage("collect") - - --break --debug - end - - print("gconf.fz", global_conf.fz) - print("gconf.fz2", global_conf.fz2) - - printf("%s Displaying result:\n", global_conf.sche_log_pre) - printf("%s %s\n", global_conf.sche_log_pre, result:status("rnn")) - printf("%s Doing on %s end.\n", global_conf.sche_log_pre, fn) - - return result -end - local train_fn, valid_fn, test_fn, global_conf local set = arg[1] --"test" @@ -290,6 +209,12 @@ global_conf.train_fn_shuf = global_conf.work_dir .. '/train_fn_shuf' global_conf.train_fn_shuf_bak = global_conf.train_fn_shuf .. '_bak' global_conf.param_fn = global_conf.work_dir .. "/params" +printf("%s printing global_conf\n", global_conf.sche_log_pre) +for id, value in pairs(global_conf) do + print(id, value) +end +nerv.LMUtil.wait(2) + printf("%s creating work_dir...\n", global_conf.sche_log_pre) os.execute("mkdir -p "..global_conf.work_dir) os.execute("cp " .. global_conf.train_fn .. " " .. global_conf.train_fn_shuf) @@ -302,11 +227,15 @@ prepare_parameters(global_conf, true) --randomly generate parameters print("===INITIAL VALIDATION===") local tnn, paramRepo = load_net(global_conf) -local result = lm_process_file(global_conf, global_conf.valid_fn, tnn, false) --false update! +local result = LMTrainer.lm_process_file(global_conf, global_conf.valid_fn, tnn, false) --false update! nerv.LMUtil.wait(3) ppl_rec = {} lr_rec = {} -ppl_rec[0] = result:ppl_net("rnn") ppl_last = ppl_rec[0] +ppl_rec[0] = {} +ppl_rec[0].valid = result:ppl_net("rnn") +ppl_last = ppl_rec[0].valid +ppl_rec[0].train = 0 +ppl_rec[0].test = 0 lr_rec[0] = 0 print() local lr_half = false @@ -314,33 +243,42 @@ for iter = 1, global_conf.max_iter, 1 do tnn, paramRepo = load_net(global_conf) printf("===ITERATION %d LR %f===\n", iter, global_conf.lrate) global_conf.sche_log_pre = "[SCHEDULER ITER"..iter.." LR"..global_conf.lrate.."]:" - lm_process_file(global_conf, global_conf.train_fn_shuf, tnn, true) --true update! + result = LMTrainer.lm_process_file(global_conf, global_conf.train_fn_shuf, tnn, true) --true update! + ppl_rec[iter] = {} + ppl_rec[iter].train = result:ppl_net("rnn") --shuffling training file + printf("%s shuffling training file\n", global_conf.sche_log_pre) os.execute('cp ' .. global_conf.train_fn_shuf .. ' ' .. global_conf.train_fn_shuf_bak) os.execute('cat ' .. global_conf.train_fn_shuf_bak .. ' | sort -R --random-source=/dev/zero > ' .. global_conf.train_fn_shuf) + printf("===PEEK ON TEST %d===\n", iter) + result = LMTrainer.lm_process_file(global_conf, global_conf.test_fn, tnn, false) --false update! + ppl_rec[iter].test = result:ppl_net("rnn") printf("===VALIDATION %d===\n", iter) - result = lm_process_file(global_conf, global_conf.valid_fn, tnn, false) --false update! - ppl_rec[iter] = result:ppl_net("rnn") + result = LMTrainer.lm_process_file(global_conf, global_conf.valid_fn, tnn, false) --false update! + ppl_rec[iter].valid = result:ppl_net("rnn") lr_rec[iter] = global_conf.lrate - if (ppl_last / ppl_rec[iter] < 1.0003 or lr_half == true) then + if (ppl_last / ppl_rec[iter].valid < 1.0003 or lr_half == true) then global_conf.lrate = (global_conf.lrate * 0.6) lr_half = true end - if (ppl_rec[iter] < ppl_last) then + if (ppl_rec[iter].valid < ppl_last) then printf("%s saving net to file %s...\n", global_conf.sche_log_pre, global_conf.param_fn) paramRepo:export(global_conf.param_fn, nil) - ppl_last = ppl_rec[iter] + ppl_last = ppl_rec[iter].valid else printf("%s PPL did not improve, rejected...\n", global_conf.sche_log_pre) + if (lr_halg == true) then + printf("%s LR is already halfing, end training...\n", global_conf.sche_log_pre) + end end printf("\n") nerv.LMUtil.wait(2) end printf("===VALIDATION PPL record===\n") -for i = 0, #ppl_rec do printf("<ITER%d LR%.5f: %.3f> ", i, lr_rec[i], ppl_rec[i]) end +for i = 0, #ppl_rec do printf("<ITER%d LR%.5f train:%.3f valid:%.3f test:%.3f> \n", i, lr_rec[i], ppl_rec[i].train, ppl_rec[i].valid, ppl_rec[i].test) end printf("\n") printf("===FINAL TEST===\n") global_conf.sche_log_pre = "[SCHEDULER FINAL_TEST]:" tnn, paramRepo = load_net(global_conf) -lm_process_file(global_conf, global_conf.test_fn, tnn, false) --false update! +LMTrainer.lm_process_file(global_conf, global_conf.test_fn, tnn, false) --false update! diff --git a/nerv/examples/lmptb/main.lua b/nerv/examples/lmptb/unfold_ptb_main.lua index a93c148..6c4ead3 100644 --- a/nerv/examples/lmptb/main.lua +++ b/nerv/examples/lmptb/unfold_ptb_main.lua @@ -1,3 +1,8 @@ +--author: txh18(Tianxing) +--This recipe is rnnlm with bptt, unfolding for each time instance +--The training framework is the same with Mikolov's rnnlm, Tianxing's XRNN-CPU and Wengong's XRNN-GPU +--It uses DAGLayer to simulate RNNLM unfold + --TODO: the select_linear now accepts a column vector, instead of a row vector require 'lmptb.lmvocab' |