aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--nerv/examples/lmptb/lm_trainer.lua89
-rw-r--r--nerv/examples/lmptb/rnn/tnn.lua31
-rw-r--r--nerv/examples/lmptb/tnn_ptb_main.lua (renamed from nerv/examples/lmptb/m-tests/tnn_test.lua)126
-rw-r--r--nerv/examples/lmptb/unfold_ptb_main.lua (renamed from nerv/examples/lmptb/main.lua)5
4 files changed, 127 insertions, 124 deletions
diff --git a/nerv/examples/lmptb/lm_trainer.lua b/nerv/examples/lmptb/lm_trainer.lua
new file mode 100644
index 0000000..d34634c
--- /dev/null
+++ b/nerv/examples/lmptb/lm_trainer.lua
@@ -0,0 +1,89 @@
+require 'lmptb.lmvocab'
+require 'lmptb.lmfeeder'
+require 'lmptb.lmutil'
+require 'lmptb.layer.init'
+require 'rnn.init'
+require 'lmptb.lmseqreader'
+
+local LMTrainer = nerv.class('nerv.LMTrainer')
+
+local printf = nerv.printf
+
+--Returns: LMResult
+function LMTrainer.lm_process_file(global_conf, fn, tnn, do_train)
+ local reader = nerv.LMSeqReader(global_conf, global_conf.batch_size, global_conf.chunk_size, global_conf.vocab)
+ reader:open_file(fn)
+ local result = nerv.LMResult(global_conf, global_conf.vocab)
+ result:init("rnn")
+
+ tnn:flush_all() --caution: will also flush the inputs from the reader!
+
+ local next_log_wcn = global_conf.log_w_num
+
+ while (1) do
+ local r, feeds
+
+ r, feeds = tnn:getFeedFromReader(reader)
+ if (r == false) then break end
+
+ for t = 1, global_conf.chunk_size do
+ tnn.err_inputs_m[t][1]:fill(1)
+ for i = 1, global_conf.batch_size do
+ if (bit.band(feeds.flags_now[t][i], nerv.TNN.FC.HAS_LABEL) == 0) then
+ tnn.err_inputs_m[t][1][i - 1][0] = 0
+ end
+ end
+ end
+
+ --[[
+ for j = 1, global_conf.chunk_size, 1 do
+ for i = 1, global_conf.batch_size, 1 do
+ printf("%s[L(%s)] ", feeds.inputs_s[j][i], feeds.labels_s[j][i]) --vocab:get_word_str(input[i][j]).id
+ end
+ printf("\n")
+ end
+ printf("\n")
+ ]]--
+
+ tnn:net_propagate()
+
+ if (do_train == true) then
+ tnn:net_backpropagate(false)
+ tnn:net_backpropagate(true)
+ end
+
+ for t = 1, global_conf.chunk_size, 1 do
+ for i = 1, global_conf.batch_size, 1 do
+ if (feeds.labels_s[t][i] ~= global_conf.vocab.null_token) then
+ result:add("rnn", feeds.labels_s[t][i], math.exp(tnn.outputs_m[t][1][i - 1][0]))
+ end
+ end
+ end
+ if (result["rnn"].cn_w > next_log_wcn) then
+ next_log_wcn = next_log_wcn + global_conf.log_w_num
+ printf("%s %d words processed %s.\n", global_conf.sche_log_pre, result["rnn"].cn_w, os.date())
+ printf("\t%s log prob per sample :%f.\n", global_conf.sche_log_pre, result:logp_sample("rnn"))
+ nerv.LMUtil.wait(0.1)
+ end
+
+ --[[
+ for t = 1, global_conf.chunk_size do
+ print(tnn.outputs_m[t][1])
+ end
+ ]]--
+
+ tnn:moveRightToNextMB()
+
+ collectgarbage("collect")
+
+ --break --debug
+ end
+
+ printf("%s Displaying result:\n", global_conf.sche_log_pre)
+ printf("%s %s\n", global_conf.sche_log_pre, result:status("rnn"))
+ printf("%s Doing on %s end.\n", global_conf.sche_log_pre, fn)
+
+ return result
+end
+
+
diff --git a/nerv/examples/lmptb/rnn/tnn.lua b/nerv/examples/lmptb/rnn/tnn.lua
index fc5321d..9850fe5 100644
--- a/nerv/examples/lmptb/rnn/tnn.lua
+++ b/nerv/examples/lmptb/rnn/tnn.lua
@@ -1,5 +1,4 @@
local TNN = nerv.class("nerv.TNN", "nerv.Layer")
-local DAGLayer = TNN
local function parse_id(str)
--used to parse layerid[portid],time
@@ -91,7 +90,7 @@ end
function TNN:__init(id, global_conf, layer_conf)
local layers = {}
- local inputs_p = {} --map:port of the TDAGLayer to layer ref and port
+ local inputs_p = {} --map:port of the TNN to layer ref and port
local outputs_p = {}
local dim_in = layer_conf.dim_in
local dim_out = layer_conf.dim_out
@@ -394,7 +393,6 @@ function TNN:propagate_dfs(ref, t)
if (seq_start > 0 or seq_end > 0) then
for p, conn in pairs(ref.o_conns_p) do
if ((ref.o_conns_p[p].time > 0 and seq_end > 0) or (ref.o_conns_p[p].time < 0 and seq_start > 0)) then
- self.gconf.fz2 = self.gconf.fz2 + 1
ref.outputs_m[t][p][i - 1]:fill(self.gconf.nn_act_default)
end
end
@@ -502,7 +500,6 @@ function TNN:backpropagate_dfs(ref, t, do_update)
if (seq_start > 0 or seq_end > 0) then
for p, conn in pairs(ref.i_conns_p) do
if ((ref.i_conns_p[p].time > 0 and seq_start > 0) or (ref.i_conns_p[p].time < 0 and seq_end > 0)) then --cross-border, set to zero
- self.gconf.fz = self.gconf.fz + 1
ref.err_outputs_m[t][p][i - 1]:fill(0)
end
end
@@ -534,29 +531,3 @@ function TNN:get_params()
return nerv.ParamRepo.merge(param_repos)
end
-DAGLayer.PORT_TYPES = {
- INPUT = {},
- OUTPUT = {},
- ERR_INPUT = {},
- ERR_OUTPUT = {}
-}
-
-function DAGLayer:get_intermediate(id, port_type)
- if id == "<input>" or id == "<output>" then
- nerv.error("an actual real layer id is expected")
- end
- local layer = self.layers[id]
- if layer == nil then
- nerv.error("layer id %s not found", id)
- end
- if port_type == DAGLayer.PORT_TYPES.INPUT then
- return layer.inputs
- elseif port_type == DAGLayer.PORT_TYPES.OUTPUT then
- return layer.outputs
- elseif port_type == DAGLayer.PORT_TYPES.ERR_INPUT then
- return layer.err_inputs
- elseif port_type == DAGLayer.PORT_TYPES.ERR_OUTPUT then
- return layer.err_outputs
- end
- nerv.error("unrecognized port type")
-end
diff --git a/nerv/examples/lmptb/m-tests/tnn_test.lua b/nerv/examples/lmptb/tnn_ptb_main.lua
index e2c0d39..803ae68 100644
--- a/nerv/examples/lmptb/m-tests/tnn_test.lua
+++ b/nerv/examples/lmptb/tnn_ptb_main.lua
@@ -4,9 +4,11 @@ require 'lmptb.lmutil'
require 'lmptb.layer.init'
require 'rnn.init'
require 'lmptb.lmseqreader'
+require 'lm_trainer'
--[[global function rename]]--
-printf = nerv.printf
+local printf = nerv.printf
+local LMTrainer = nerv.LMTrainer
--[[global function rename ends]]--
--global_conf: table
@@ -144,89 +146,6 @@ function load_net(global_conf)
return tnn, paramRepo
end
---Returns: LMResult
-function lm_process_file(global_conf, fn, tnn, do_train)
- local reader = nerv.LMSeqReader(global_conf, global_conf.batch_size, global_conf.chunk_size, global_conf.vocab)
- reader:open_file(fn)
- local result = nerv.LMResult(global_conf, global_conf.vocab)
- result:init("rnn")
-
- tnn:flush_all() --caution: will also flush the inputs from the reader!
-
- local next_log_wcn = global_conf.log_w_num
-
- global_conf.fz = 0
- global_conf.fz2 = 0
-
- while (1) do
- local r, feeds
-
- r, feeds = tnn:getFeedFromReader(reader)
- if (r == false) then break end
-
- for t = 1, global_conf.chunk_size do
- tnn.err_inputs_m[t][1]:fill(1)
- for i = 1, global_conf.batch_size do
- if (bit.band(feeds.flags_now[t][i], nerv.TNN.FC.HAS_LABEL) == 0) then
- tnn.err_inputs_m[t][1][i - 1][0] = 0
- end
- end
- end
-
- --[[
- for j = 1, global_conf.chunk_size, 1 do
- for i = 1, global_conf.batch_size, 1 do
- printf("%s[L(%s)] ", feeds.inputs_s[j][i], feeds.labels_s[j][i]) --vocab:get_word_str(input[i][j]).id
- end
- printf("\n")
- end
- printf("\n")
- ]]--
-
- tnn:net_propagate()
-
- if (do_train == true) then
- tnn:net_backpropagate(false)
- tnn:net_backpropagate(true)
- end
-
- for t = 1, global_conf.chunk_size, 1 do
- for i = 1, global_conf.batch_size, 1 do
- if (feeds.labels_s[t][i] ~= global_conf.vocab.null_token) then
- result:add("rnn", feeds.labels_s[t][i], math.exp(tnn.outputs_m[t][1][i - 1][0]))
- end
- end
- end
- if (result["rnn"].cn_w > next_log_wcn) then
- next_log_wcn = next_log_wcn + global_conf.log_w_num
- printf("%s %d words processed %s.\n", global_conf.sche_log_pre, result["rnn"].cn_w, os.date())
- printf("\t%s log prob per sample :%f.\n", global_conf.sche_log_pre, result:logp_sample("rnn"))
- nerv.LMUtil.wait(0.1)
- end
-
- --[[
- for t = 1, global_conf.chunk_size do
- print(tnn.outputs_m[t][1])
- end
- ]]--
-
- tnn:moveRightToNextMB()
-
- collectgarbage("collect")
-
- --break --debug
- end
-
- print("gconf.fz", global_conf.fz)
- print("gconf.fz2", global_conf.fz2)
-
- printf("%s Displaying result:\n", global_conf.sche_log_pre)
- printf("%s %s\n", global_conf.sche_log_pre, result:status("rnn"))
- printf("%s Doing on %s end.\n", global_conf.sche_log_pre, fn)
-
- return result
-end
-
local train_fn, valid_fn, test_fn, global_conf
local set = arg[1] --"test"
@@ -290,6 +209,12 @@ global_conf.train_fn_shuf = global_conf.work_dir .. '/train_fn_shuf'
global_conf.train_fn_shuf_bak = global_conf.train_fn_shuf .. '_bak'
global_conf.param_fn = global_conf.work_dir .. "/params"
+printf("%s printing global_conf\n", global_conf.sche_log_pre)
+for id, value in pairs(global_conf) do
+ print(id, value)
+end
+nerv.LMUtil.wait(2)
+
printf("%s creating work_dir...\n", global_conf.sche_log_pre)
os.execute("mkdir -p "..global_conf.work_dir)
os.execute("cp " .. global_conf.train_fn .. " " .. global_conf.train_fn_shuf)
@@ -302,11 +227,15 @@ prepare_parameters(global_conf, true) --randomly generate parameters
print("===INITIAL VALIDATION===")
local tnn, paramRepo = load_net(global_conf)
-local result = lm_process_file(global_conf, global_conf.valid_fn, tnn, false) --false update!
+local result = LMTrainer.lm_process_file(global_conf, global_conf.valid_fn, tnn, false) --false update!
nerv.LMUtil.wait(3)
ppl_rec = {}
lr_rec = {}
-ppl_rec[0] = result:ppl_net("rnn") ppl_last = ppl_rec[0]
+ppl_rec[0] = {}
+ppl_rec[0].valid = result:ppl_net("rnn")
+ppl_last = ppl_rec[0].valid
+ppl_rec[0].train = 0
+ppl_rec[0].test = 0
lr_rec[0] = 0
print()
local lr_half = false
@@ -314,33 +243,42 @@ for iter = 1, global_conf.max_iter, 1 do
tnn, paramRepo = load_net(global_conf)
printf("===ITERATION %d LR %f===\n", iter, global_conf.lrate)
global_conf.sche_log_pre = "[SCHEDULER ITER"..iter.." LR"..global_conf.lrate.."]:"
- lm_process_file(global_conf, global_conf.train_fn_shuf, tnn, true) --true update!
+ result = LMTrainer.lm_process_file(global_conf, global_conf.train_fn_shuf, tnn, true) --true update!
+ ppl_rec[iter] = {}
+ ppl_rec[iter].train = result:ppl_net("rnn")
--shuffling training file
+ printf("%s shuffling training file\n", global_conf.sche_log_pre)
os.execute('cp ' .. global_conf.train_fn_shuf .. ' ' .. global_conf.train_fn_shuf_bak)
os.execute('cat ' .. global_conf.train_fn_shuf_bak .. ' | sort -R --random-source=/dev/zero > ' .. global_conf.train_fn_shuf)
+ printf("===PEEK ON TEST %d===\n", iter)
+ result = LMTrainer.lm_process_file(global_conf, global_conf.test_fn, tnn, false) --false update!
+ ppl_rec[iter].test = result:ppl_net("rnn")
printf("===VALIDATION %d===\n", iter)
- result = lm_process_file(global_conf, global_conf.valid_fn, tnn, false) --false update!
- ppl_rec[iter] = result:ppl_net("rnn")
+ result = LMTrainer.lm_process_file(global_conf, global_conf.valid_fn, tnn, false) --false update!
+ ppl_rec[iter].valid = result:ppl_net("rnn")
lr_rec[iter] = global_conf.lrate
- if (ppl_last / ppl_rec[iter] < 1.0003 or lr_half == true) then
+ if (ppl_last / ppl_rec[iter].valid < 1.0003 or lr_half == true) then
global_conf.lrate = (global_conf.lrate * 0.6)
lr_half = true
end
- if (ppl_rec[iter] < ppl_last) then
+ if (ppl_rec[iter].valid < ppl_last) then
printf("%s saving net to file %s...\n", global_conf.sche_log_pre, global_conf.param_fn)
paramRepo:export(global_conf.param_fn, nil)
- ppl_last = ppl_rec[iter]
+ ppl_last = ppl_rec[iter].valid
else
printf("%s PPL did not improve, rejected...\n", global_conf.sche_log_pre)
+ if (lr_halg == true) then
+ printf("%s LR is already halfing, end training...\n", global_conf.sche_log_pre)
+ end
end
printf("\n")
nerv.LMUtil.wait(2)
end
printf("===VALIDATION PPL record===\n")
-for i = 0, #ppl_rec do printf("<ITER%d LR%.5f: %.3f> ", i, lr_rec[i], ppl_rec[i]) end
+for i = 0, #ppl_rec do printf("<ITER%d LR%.5f train:%.3f valid:%.3f test:%.3f> \n", i, lr_rec[i], ppl_rec[i].train, ppl_rec[i].valid, ppl_rec[i].test) end
printf("\n")
printf("===FINAL TEST===\n")
global_conf.sche_log_pre = "[SCHEDULER FINAL_TEST]:"
tnn, paramRepo = load_net(global_conf)
-lm_process_file(global_conf, global_conf.test_fn, tnn, false) --false update!
+LMTrainer.lm_process_file(global_conf, global_conf.test_fn, tnn, false) --false update!
diff --git a/nerv/examples/lmptb/main.lua b/nerv/examples/lmptb/unfold_ptb_main.lua
index a93c148..6c4ead3 100644
--- a/nerv/examples/lmptb/main.lua
+++ b/nerv/examples/lmptb/unfold_ptb_main.lua
@@ -1,3 +1,8 @@
+--author: txh18(Tianxing)
+--This recipe is rnnlm with bptt, unfolding for each time instance
+--The training framework is the same with Mikolov's rnnlm, Tianxing's XRNN-CPU and Wengong's XRNN-GPU
+--It uses DAGLayer to simulate RNNLM unfold
+
--TODO: the select_linear now accepts a column vector, instead of a row vector
require 'lmptb.lmvocab'