aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--nerv/examples/lmptb/lstmlm_ptb_main.lua225
-rw-r--r--nerv/examples/lmptb/m-tests/sutil_test.lua8
-rw-r--r--nerv/tnn/sutil.lua12
3 files changed, 153 insertions, 92 deletions
diff --git a/nerv/examples/lmptb/lstmlm_ptb_main.lua b/nerv/examples/lmptb/lstmlm_ptb_main.lua
index 4123378..333fa96 100644
--- a/nerv/examples/lmptb/lstmlm_ptb_main.lua
+++ b/nerv/examples/lmptb/lstmlm_ptb_main.lua
@@ -74,12 +74,12 @@ function prepare_layers(global_conf)
local du = false
--local recurrentLconfig = {{["bp"] = "bp_h", ["ltp_hh"] = "ltp_hh"}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["break_id"] = global_conf.vocab:get_sen_entry().id, ["independent"] = global_conf.independent, ["clip"] = 10}}
- local recurrentLconfig = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["clip"] = 10, ["direct_update"] = du, ["pr"] = pr}}
+ --local recurrentLconfig = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["clip"] = 10, ["direct_update"] = du, ["pr"] = pr}}
local layers = {
- ["nerv.AffineRecurrentLayer"] = {
- ["recurrentL1"] = recurrentLconfig,
- },
+ --["nerv.AffineRecurrentLayer"] = {
+ -- ["recurrentL1"] = recurrentLconfig,
+ --},
["nerv.LSTMLayerT"] = {
["lstmL1"] = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size, global_conf.hidden_size}, ["pr"] = pr}},
@@ -93,12 +93,12 @@ function prepare_layers(global_conf)
["selectL1"] = {{}, {["dim_in"] = {1}, ["dim_out"] = {global_conf.hidden_size}, ["vocab"] = global_conf.vocab, ["pr"] = pr}},
},
- ["nerv.SigmoidLayer"] = {
- ["sigmoidL1"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}}
- },
+ --["nerv.SigmoidLayer"] = {
+ -- ["sigmoidL1"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}}
+ --},
["nerv.CombinerLayer"] = {
- ["combinerL1"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size, global_conf.hidden_size}, ["lambda"] = {1}}}
+ ["combinerL1"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size, global_conf.hidden_size}, ["lambda"] = {1}}},
},
["nerv.AffineLayer"] = {
@@ -109,8 +109,13 @@ function prepare_layers(global_conf)
["softmaxL"] = {{}, {["dim_in"] = {global_conf.vocab:size(), global_conf.vocab:size()}, ["dim_out"] = {1}}},
},
}
-
- --[[ --we do not need those in the new rnn framework
+
+ for l = 2, global_conf.layer_num do
+ layers["nerv.DropoutLayerT"]["dropoutL" .. l] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}}
+ layers["nerv.LSTMLayerT"]["lstmL" .. l] = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size, global_conf.hidden_size}, ["pr"] = pr}}
+ layers["nerv.CombinerLayer"]["combinerL" .. l] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size, global_conf.hidden_size}, ["lambda"] = {1}}}
+ end
+ --[[ --we do not need those in the new tnn framework
printf("%s adding %d bptt layers...\n", global_conf.sche_log_pre, global_conf.bptt)
for i = 1, global_conf.bptt do
layers["nerv.IndRecurrentLayer"]["recurrentL" .. (i + 1)] = recurrentLconfig
@@ -145,12 +150,20 @@ function prepare_tnn(global_conf, layerRepo)
{"dropoutL1[1]", "combinerL1[1]", 0},
{"combinerL1[1]", "lstmL1[2]", 1},
- {"combinerL1[2]", "outputL[1]", 0},
+ {"combinerL"..global_conf.layer_num.."[2]", "outputL[1]", 0},
{"outputL[1]", "softmaxL[1]", 0},
{"<input>[2]", "softmaxL[2]", 0},
{"softmaxL[1]", "<output>[1]", 0}
}
+ for l = 2, global_conf.layer_num do
+ table.insert(connections_t, {"combinerL"..(l-1).."[2]", "lstmL"..l.."[1]", 0})
+ table.insert(connections_t, {"lstmL"..l.."[2]", "lstmL"..l.."[3]", 1})
+ table.insert(connections_t, {"lstmL"..l.."[1]", "dropoutL"..l.."[1]", 0})
+ table.insert(connections_t, {"dropoutL"..l.."[1]", "combinerL"..l.."[1]", 0})
+ table.insert(connections_t, {"combinerL"..l.."[1]", "lstmL"..l.."[2]", 1})
+ end
+
--[[
printf("%s printing DAG connections:\n", global_conf.sche_log_pre)
for key, value in pairs(connections_t) do
@@ -189,15 +202,17 @@ test_fn = data_dir .. '/ptb.test.txt.adds'
vocab_fn = data_dir .. '/vocab'
global_conf = {
- lrate = 0.1, wcost = 1e-5, momentum = 0, clip_t = 10,
+ lrate = 0.15, wcost = 1e-5, momentum = 0, clip_t = 10,
cumat_type = nerv.CuMatrixFloat,
mmat_type = nerv.MMatrixFloat,
nn_act_default = 0,
- hidden_size = 300, --set to 400 for a stable good test PPL
+ hidden_size = 600,
+ layer_num = 2,
chunk_size = 15,
- batch_size = 10,
+ batch_size = 20,
max_iter = 45,
+ lr_decay = 1.003,
decay_iter = 10,
param_random = function() return (math.random() / 5 - 0.1) end,
dropout_str = "0.5",
@@ -227,11 +242,14 @@ global_conf = {
nn_act_default = 0,
hidden_size = 300,
+ layer_num = 1,
chunk_size = 15,
batch_size = 10,
max_iter = 30,
decay_iter = 10,
+ lr_decay = 1.003,
param_random = function() return (math.random() / 5 - 0.1) end,
+ dropout_str = "0",
train_fn = train_fn,
valid_fn = valid_fn,
@@ -251,20 +269,24 @@ test_fn = '/home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/m-tests/so
vocab_fn = '/home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/m-tests/some-text'
global_conf = {
- lrate = 1, wcost = 1e-5, momentum = 0,
+ lrate = 0.01, wcost = 1e-5, momentum = 0,
cumat_type = nerv.CuMatrixFloat,
mmat_type = nerv.MMatrixFloat,
nn_act_default = 0,
hidden_size = 20,
+ layer_num = 1,
chunk_size = 2,
batch_size = 10,
max_iter = 3,
param_random = function() return (math.random() / 5 - 0.1) end,
+ dropout_str = "0",
train_fn = train_fn,
valid_fn = valid_fn,
test_fn = test_fn,
+ lr_decay = 1.003,
+ decay_iter = 10,
vocab_fn = vocab_fn,
sche_log_pre = "[SCHEDULER]:",
log_w_num = 10, --give a message when log_w_num words have been processed
@@ -274,9 +296,13 @@ global_conf = {
end
-lr_half = false --can not be local, to be set by loadstring
-start_iter = -1
-ppl_last = 100000
+local lr_half = false --can not be local, to be set by loadstring
+local start_iter = -1
+local ppl_last = 100000
+local commands_str = "train:test"
+local commands = {}
+local test_iter = -1
+
if (arg[2] ~= nil) then
printf("%s applying arg[2](%s)...\n", global_conf.sche_log_pre, arg[2])
loadstring(arg[2])()
@@ -285,12 +311,12 @@ else
printf("%s no user setting, all default...\n", global_conf.sche_log_pre)
end
-global_conf.work_dir = global_conf.work_dir_base .. 'h' .. global_conf.hidden_size --.. 'ch' .. global_conf.chunk_size .. 'ba' .. global_conf.batch_size .. 'slr' .. global_conf.lrate .. 'wc' .. global_conf.wcost
+global_conf.work_dir = global_conf.work_dir_base .. 'h' .. global_conf.hidden_size .. 'l' .. global_conf.layer_num --.. 'ch' .. global_conf.chunk_size .. 'ba' .. global_conf.batch_size .. 'slr' .. global_conf.lrate .. 'wc' .. global_conf.wcost
global_conf.train_fn_shuf = global_conf.work_dir .. '/train_fn_shuf'
global_conf.train_fn_shuf_bak = global_conf.train_fn_shuf .. '_bak'
global_conf.param_fn = global_conf.work_dir .. "/params"
global_conf.dropout_list = nerv.SUtil.parse_schedule(global_conf.dropout_str)
-global_conf.dropout_rate = 0
+commands = nerv.SUtil.parse_commands_set(commands_str)
----------------printing options---------------------------------
printf("%s printing global_conf...\n", global_conf.sche_log_pre)
for id, value in pairs(global_conf) do
@@ -301,6 +327,8 @@ printf("%s printing training scheduling options...\n", global_conf.sche_log_pre)
print("lr_half", lr_half)
print("start_iter", start_iter)
print("ppl_last", ppl_last)
+print("commds_str", commands_str)
+print("test_iter", test_iter)
printf("%s printing training scheduling end.\n", global_conf.sche_log_pre)
nerv.LMUtil.wait(2)
------------------printing options end------------------------------
@@ -317,77 +345,92 @@ printf("%s building vocab...\n", global_conf.sche_log_pre)
global_conf.vocab:build_file(global_conf.vocab_fn, false)
ppl_rec = {}
-if start_iter == -1 then
- prepare_parameters(global_conf, -1) --write pre_generated params to param.0 file
-end
-
-if start_iter == -1 or start_iter == 0 then
- print("===INITIAL VALIDATION===")
- local tnn = load_net(global_conf, 0)
- global_conf.paramRepo = tnn:get_params() --get auto-generted params
- global_conf.paramRepo:export(global_conf.param_fn .. '.0', nil) --some parameters are auto-generated, saved again to param.0 file
- local result = LMTrainer.lm_process_file_rnn(global_conf, global_conf.valid_fn, tnn, false) --false update!
- nerv.LMUtil.wait(1)
- ppl_rec[0] = {}
- ppl_rec[0].valid = result:ppl_all("rnn")
- ppl_last = ppl_rec[0].valid
- ppl_rec[0].train = 0
- ppl_rec[0].test = 0
- ppl_rec[0].lr = 0
-
- start_iter = 1
-
- print()
-end
-
-local final_iter
-for iter = start_iter, global_conf.max_iter, 1 do
- final_iter = iter --for final testing
- global_conf.sche_log_pre = "[SCHEDULER ITER"..iter.." LR"..global_conf.lrate.."]:"
- tnn = load_net(global_conf, iter - 1)
- printf("===ITERATION %d LR %f===\n", iter, global_conf.lrate)
- global_conf.dropout_rate = nerv.SUtil.sche_get(global_conf.dropout_list, iter)
- result = LMTrainer.lm_process_file_rnn(global_conf, global_conf.train_fn_shuf, tnn, true) --true update!
- global_conf.dropout_rate = 0
- ppl_rec[iter] = {}
- ppl_rec[iter].train = result:ppl_all("rnn")
- --shuffling training file
- printf("%s shuffling training file\n", global_conf.sche_log_pre)
- os.execute('cp ' .. global_conf.train_fn_shuf .. ' ' .. global_conf.train_fn_shuf_bak)
- os.execute('cat ' .. global_conf.train_fn_shuf_bak .. ' | sort -R --random-source=/dev/zero > ' .. global_conf.train_fn_shuf)
- printf("===PEEK ON TEST %d===\n", iter)
- result = LMTrainer.lm_process_file_rnn(global_conf, global_conf.test_fn, tnn, false) --false update!
- ppl_rec[iter].test = result:ppl_all("rnn")
- printf("===VALIDATION %d===\n", iter)
- result = LMTrainer.lm_process_file_rnn(global_conf, global_conf.valid_fn, tnn, false) --false update!
- ppl_rec[iter].valid = result:ppl_all("rnn")
- ppl_rec[iter].lr = global_conf.lrate
- if ((ppl_last / ppl_rec[iter].valid < 1.0003 or lr_half == true) and iter > global_conf.decay_iter) then
- global_conf.lrate = (global_conf.lrate * 0.6)
- end
- if ppl_rec[iter].valid < ppl_last then
- printf("%s PPL improves, saving net to file %s.%d...\n", global_conf.sche_log_pre, global_conf.param_fn, iter)
- global_conf.paramRepo:export(global_conf.param_fn .. '.' .. tostring(iter), nil)
- else
- printf("%s PPL did not improve, rejected, copying param file of last iter...\n", global_conf.sche_log_pre)
- os.execute('cp ' .. global_conf.param_fn..'.'..tostring(iter - 1) .. ' ' .. global_conf.param_fn..'.'..tostring(iter))
+local final_iter = -1
+if commands["train"] == 1 then
+ if start_iter == -1 then
+ prepare_parameters(global_conf, -1) --write pre_generated params to param.0 file
+ end
+
+ if start_iter == -1 or start_iter == 0 then
+ print("===INITIAL VALIDATION===")
+ local tnn = load_net(global_conf, 0)
+ global_conf.paramRepo = tnn:get_params() --get auto-generted params
+ global_conf.paramRepo:export(global_conf.param_fn .. '.0', nil) --some parameters are auto-generated, saved again to param.0 file
+ global_conf.dropout_rate = 0
+ local result = LMTrainer.lm_process_file_rnn(global_conf, global_conf.valid_fn, tnn, false) --false update!
+ nerv.LMUtil.wait(1)
+ ppl_rec[0] = {}
+ ppl_rec[0].valid = result:ppl_all("rnn")
+ ppl_last = ppl_rec[0].valid
+ ppl_rec[0].train = 0
+ ppl_rec[0].test = 0
+ ppl_rec[0].lr = 0
+
+ start_iter = 1
+
+ print()
+ end
+
+ for iter = start_iter, global_conf.max_iter, 1 do
+ final_iter = iter --for final testing
+ global_conf.sche_log_pre = "[SCHEDULER ITER"..iter.." LR"..global_conf.lrate.."]:"
+ tnn = load_net(global_conf, iter - 1)
+ printf("===ITERATION %d LR %f===\n", iter, global_conf.lrate)
+ global_conf.dropout_rate = nerv.SUtil.sche_get(global_conf.dropout_list, iter)
+ result = LMTrainer.lm_process_file_rnn(global_conf, global_conf.train_fn_shuf, tnn, true) --true update!
+ global_conf.dropout_rate = 0
+ ppl_rec[iter] = {}
+ ppl_rec[iter].train = result:ppl_all("rnn")
+ --shuffling training file
+ printf("%s shuffling training file\n", global_conf.sche_log_pre)
+ os.execute('cp ' .. global_conf.train_fn_shuf .. ' ' .. global_conf.train_fn_shuf_bak)
+ os.execute('cat ' .. global_conf.train_fn_shuf_bak .. ' | sort -R --random-source=/dev/zero > ' .. global_conf.train_fn_shuf)
+ printf("===PEEK ON TEST %d===\n", iter)
+ result = LMTrainer.lm_process_file_rnn(global_conf, global_conf.test_fn, tnn, false) --false update!
+ ppl_rec[iter].test = result:ppl_all("rnn")
+ printf("===VALIDATION %d===\n", iter)
+ result = LMTrainer.lm_process_file_rnn(global_conf, global_conf.valid_fn, tnn, false) --false update!
+ ppl_rec[iter].valid = result:ppl_all("rnn")
+ ppl_rec[iter].lr = global_conf.lrate
+ if ((ppl_last / ppl_rec[iter].valid < global_conf.lr_decay or lr_half == true) and iter > global_conf.decay_iter) then
+ global_conf.lrate = (global_conf.lrate * 0.6)
+ end
+ if ppl_rec[iter].valid < ppl_last then
+ printf("%s PPL improves, saving net to file %s.%d...\n", global_conf.sche_log_pre, global_conf.param_fn, iter)
+ global_conf.paramRepo:export(global_conf.param_fn .. '.' .. tostring(iter), nil)
+ else
+ printf("%s PPL did not improve, rejected, copying param file of last iter...\n", global_conf.sche_log_pre)
+ os.execute('cp ' .. global_conf.param_fn..'.'..tostring(iter - 1) .. ' ' .. global_conf.param_fn..'.'..tostring(iter))
+ end
+ if ppl_last / ppl_rec[iter].valid < global_conf.lr_decay or lr_half == true then
+ lr_half = true
+ end
+ if ppl_rec[iter].valid < ppl_last then
+ ppl_last = ppl_rec[iter].valid
+ end
+ printf("\n")
+ nerv.LMUtil.wait(2)
+ end
+ nerv.info("saving final nn to param.final")
+ os.execute('cp ' .. global_conf.param_fn .. '.' .. tostring(final_iter) .. ' ' .. global_conf.param_fn .. '.final')
+
+ printf("===VALIDATION PPL record===\n")
+ for i, _ in pairs(ppl_rec) do
+ printf("<ITER%d LR%.5f train:%.3f valid:%.3f test:%.3f> \n", i, ppl_rec[i].lr, ppl_rec[i].train, ppl_rec[i].valid, ppl_rec[i].test)
end
- if ppl_last / ppl_rec[iter].valid < 1.0003 or lr_half == true then
- lr_half = true
+ printf("\n")
+end --if commands["train"]
+
+if commands["test"] == 1 then
+ printf("===FINAL TEST===\n")
+ global_conf.sche_log_pre = "[SCHEDULER FINAL_TEST]:"
+ if final_iter ~= -1 and test_iter == -1 then
+ test_iter = final_iter
end
- if ppl_rec[iter].valid < ppl_last then
- ppl_last = ppl_rec[iter].valid
+ if test_iter == -1 then
+ test_iter = "final"
end
- printf("\n")
- nerv.LMUtil.wait(2)
-end
-printf("===VALIDATION PPL record===\n")
-for i, _ in pairs(ppl_rec) do
- printf("<ITER%d LR%.5f train:%.3f valid:%.3f test:%.3f> \n", i, ppl_rec[i].lr, ppl_rec[i].train, ppl_rec[i].valid, ppl_rec[i].test)
-end
-printf("\n")
-printf("===FINAL TEST===\n")
-global_conf.sche_log_pre = "[SCHEDULER FINAL_TEST]:"
-tnn = load_net(global_conf, final_iter)
-LMTrainer.lm_process_file_rnn(global_conf, global_conf.test_fn, tnn, false) --false update!
-
+ tnn = load_net(global_conf, test_iter)
+ global_conf.dropout_rate = 0
+ LMTrainer.lm_process_file_rnn(global_conf, global_conf.test_fn, tnn, false) --false update!
+end --if commands["test"]
diff --git a/nerv/examples/lmptb/m-tests/sutil_test.lua b/nerv/examples/lmptb/m-tests/sutil_test.lua
index 3f9bf9e..058de7e 100644
--- a/nerv/examples/lmptb/m-tests/sutil_test.lua
+++ b/nerv/examples/lmptb/m-tests/sutil_test.lua
@@ -1,4 +1,10 @@
--require "tnn.init"
-ss = "0.1*1:2"
+local ss = "0.1*1:2"
nerv.SUtil.parse_schedule(ss)
+ss = "train:test"
+local coms = nerv.SUtil.parse_commands_set(ss)
+print("!!!")
+for p, v in pairs(coms) do
+ print(p,v)
+end
diff --git a/nerv/tnn/sutil.lua b/nerv/tnn/sutil.lua
index f5bc408..d88bd8e 100644
--- a/nerv/tnn/sutil.lua
+++ b/nerv/tnn/sutil.lua
@@ -50,3 +50,15 @@ function Util.sche_get(s, it)
return s[#s]
end
end
+
+function Util.parse_commands_set(str)
+ local coms = {}
+ local s = Util.simple_split(str, ':,')
+ for i = 1 ,#s do
+ if coms[s[i]] == 1 then
+ nerv.warning("nerv.SUtil.parse_commands_set command(%s) appered more than once in command_set(%s)", s[i], str)
+ end
+ coms[s[i]] = 1
+ end
+ return coms
+end