diff options
-rw-r--r-- | nerv/examples/lmptb/lstmlm_ptb_main.lua | 225 | ||||
-rw-r--r-- | nerv/examples/lmptb/m-tests/sutil_test.lua | 8 | ||||
-rw-r--r-- | nerv/tnn/sutil.lua | 12 |
3 files changed, 153 insertions, 92 deletions
diff --git a/nerv/examples/lmptb/lstmlm_ptb_main.lua b/nerv/examples/lmptb/lstmlm_ptb_main.lua index 4123378..333fa96 100644 --- a/nerv/examples/lmptb/lstmlm_ptb_main.lua +++ b/nerv/examples/lmptb/lstmlm_ptb_main.lua @@ -74,12 +74,12 @@ function prepare_layers(global_conf) local du = false --local recurrentLconfig = {{["bp"] = "bp_h", ["ltp_hh"] = "ltp_hh"}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["break_id"] = global_conf.vocab:get_sen_entry().id, ["independent"] = global_conf.independent, ["clip"] = 10}} - local recurrentLconfig = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["clip"] = 10, ["direct_update"] = du, ["pr"] = pr}} + --local recurrentLconfig = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["clip"] = 10, ["direct_update"] = du, ["pr"] = pr}} local layers = { - ["nerv.AffineRecurrentLayer"] = { - ["recurrentL1"] = recurrentLconfig, - }, + --["nerv.AffineRecurrentLayer"] = { + -- ["recurrentL1"] = recurrentLconfig, + --}, ["nerv.LSTMLayerT"] = { ["lstmL1"] = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size, global_conf.hidden_size}, ["pr"] = pr}}, @@ -93,12 +93,12 @@ function prepare_layers(global_conf) ["selectL1"] = {{}, {["dim_in"] = {1}, ["dim_out"] = {global_conf.hidden_size}, ["vocab"] = global_conf.vocab, ["pr"] = pr}}, }, - ["nerv.SigmoidLayer"] = { - ["sigmoidL1"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}} - }, + --["nerv.SigmoidLayer"] = { + -- ["sigmoidL1"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}} + --}, ["nerv.CombinerLayer"] = { - ["combinerL1"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size, global_conf.hidden_size}, ["lambda"] = {1}}} + ["combinerL1"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size, global_conf.hidden_size}, ["lambda"] = {1}}}, }, ["nerv.AffineLayer"] = { @@ -109,8 +109,13 @@ function prepare_layers(global_conf) ["softmaxL"] = {{}, {["dim_in"] = {global_conf.vocab:size(), global_conf.vocab:size()}, ["dim_out"] = {1}}}, }, } - - --[[ --we do not need those in the new rnn framework + + for l = 2, global_conf.layer_num do + layers["nerv.DropoutLayerT"]["dropoutL" .. l] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}} + layers["nerv.LSTMLayerT"]["lstmL" .. l] = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size, global_conf.hidden_size}, ["pr"] = pr}} + layers["nerv.CombinerLayer"]["combinerL" .. l] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size, global_conf.hidden_size}, ["lambda"] = {1}}} + end + --[[ --we do not need those in the new tnn framework printf("%s adding %d bptt layers...\n", global_conf.sche_log_pre, global_conf.bptt) for i = 1, global_conf.bptt do layers["nerv.IndRecurrentLayer"]["recurrentL" .. (i + 1)] = recurrentLconfig @@ -145,12 +150,20 @@ function prepare_tnn(global_conf, layerRepo) {"dropoutL1[1]", "combinerL1[1]", 0}, {"combinerL1[1]", "lstmL1[2]", 1}, - {"combinerL1[2]", "outputL[1]", 0}, + {"combinerL"..global_conf.layer_num.."[2]", "outputL[1]", 0}, {"outputL[1]", "softmaxL[1]", 0}, {"<input>[2]", "softmaxL[2]", 0}, {"softmaxL[1]", "<output>[1]", 0} } + for l = 2, global_conf.layer_num do + table.insert(connections_t, {"combinerL"..(l-1).."[2]", "lstmL"..l.."[1]", 0}) + table.insert(connections_t, {"lstmL"..l.."[2]", "lstmL"..l.."[3]", 1}) + table.insert(connections_t, {"lstmL"..l.."[1]", "dropoutL"..l.."[1]", 0}) + table.insert(connections_t, {"dropoutL"..l.."[1]", "combinerL"..l.."[1]", 0}) + table.insert(connections_t, {"combinerL"..l.."[1]", "lstmL"..l.."[2]", 1}) + end + --[[ printf("%s printing DAG connections:\n", global_conf.sche_log_pre) for key, value in pairs(connections_t) do @@ -189,15 +202,17 @@ test_fn = data_dir .. '/ptb.test.txt.adds' vocab_fn = data_dir .. '/vocab' global_conf = { - lrate = 0.1, wcost = 1e-5, momentum = 0, clip_t = 10, + lrate = 0.15, wcost = 1e-5, momentum = 0, clip_t = 10, cumat_type = nerv.CuMatrixFloat, mmat_type = nerv.MMatrixFloat, nn_act_default = 0, - hidden_size = 300, --set to 400 for a stable good test PPL + hidden_size = 600, + layer_num = 2, chunk_size = 15, - batch_size = 10, + batch_size = 20, max_iter = 45, + lr_decay = 1.003, decay_iter = 10, param_random = function() return (math.random() / 5 - 0.1) end, dropout_str = "0.5", @@ -227,11 +242,14 @@ global_conf = { nn_act_default = 0, hidden_size = 300, + layer_num = 1, chunk_size = 15, batch_size = 10, max_iter = 30, decay_iter = 10, + lr_decay = 1.003, param_random = function() return (math.random() / 5 - 0.1) end, + dropout_str = "0", train_fn = train_fn, valid_fn = valid_fn, @@ -251,20 +269,24 @@ test_fn = '/home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/m-tests/so vocab_fn = '/home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/m-tests/some-text' global_conf = { - lrate = 1, wcost = 1e-5, momentum = 0, + lrate = 0.01, wcost = 1e-5, momentum = 0, cumat_type = nerv.CuMatrixFloat, mmat_type = nerv.MMatrixFloat, nn_act_default = 0, hidden_size = 20, + layer_num = 1, chunk_size = 2, batch_size = 10, max_iter = 3, param_random = function() return (math.random() / 5 - 0.1) end, + dropout_str = "0", train_fn = train_fn, valid_fn = valid_fn, test_fn = test_fn, + lr_decay = 1.003, + decay_iter = 10, vocab_fn = vocab_fn, sche_log_pre = "[SCHEDULER]:", log_w_num = 10, --give a message when log_w_num words have been processed @@ -274,9 +296,13 @@ global_conf = { end -lr_half = false --can not be local, to be set by loadstring -start_iter = -1 -ppl_last = 100000 +local lr_half = false --can not be local, to be set by loadstring +local start_iter = -1 +local ppl_last = 100000 +local commands_str = "train:test" +local commands = {} +local test_iter = -1 + if (arg[2] ~= nil) then printf("%s applying arg[2](%s)...\n", global_conf.sche_log_pre, arg[2]) loadstring(arg[2])() @@ -285,12 +311,12 @@ else printf("%s no user setting, all default...\n", global_conf.sche_log_pre) end -global_conf.work_dir = global_conf.work_dir_base .. 'h' .. global_conf.hidden_size --.. 'ch' .. global_conf.chunk_size .. 'ba' .. global_conf.batch_size .. 'slr' .. global_conf.lrate .. 'wc' .. global_conf.wcost +global_conf.work_dir = global_conf.work_dir_base .. 'h' .. global_conf.hidden_size .. 'l' .. global_conf.layer_num --.. 'ch' .. global_conf.chunk_size .. 'ba' .. global_conf.batch_size .. 'slr' .. global_conf.lrate .. 'wc' .. global_conf.wcost global_conf.train_fn_shuf = global_conf.work_dir .. '/train_fn_shuf' global_conf.train_fn_shuf_bak = global_conf.train_fn_shuf .. '_bak' global_conf.param_fn = global_conf.work_dir .. "/params" global_conf.dropout_list = nerv.SUtil.parse_schedule(global_conf.dropout_str) -global_conf.dropout_rate = 0 +commands = nerv.SUtil.parse_commands_set(commands_str) ----------------printing options--------------------------------- printf("%s printing global_conf...\n", global_conf.sche_log_pre) for id, value in pairs(global_conf) do @@ -301,6 +327,8 @@ printf("%s printing training scheduling options...\n", global_conf.sche_log_pre) print("lr_half", lr_half) print("start_iter", start_iter) print("ppl_last", ppl_last) +print("commds_str", commands_str) +print("test_iter", test_iter) printf("%s printing training scheduling end.\n", global_conf.sche_log_pre) nerv.LMUtil.wait(2) ------------------printing options end------------------------------ @@ -317,77 +345,92 @@ printf("%s building vocab...\n", global_conf.sche_log_pre) global_conf.vocab:build_file(global_conf.vocab_fn, false) ppl_rec = {} -if start_iter == -1 then - prepare_parameters(global_conf, -1) --write pre_generated params to param.0 file -end - -if start_iter == -1 or start_iter == 0 then - print("===INITIAL VALIDATION===") - local tnn = load_net(global_conf, 0) - global_conf.paramRepo = tnn:get_params() --get auto-generted params - global_conf.paramRepo:export(global_conf.param_fn .. '.0', nil) --some parameters are auto-generated, saved again to param.0 file - local result = LMTrainer.lm_process_file_rnn(global_conf, global_conf.valid_fn, tnn, false) --false update! - nerv.LMUtil.wait(1) - ppl_rec[0] = {} - ppl_rec[0].valid = result:ppl_all("rnn") - ppl_last = ppl_rec[0].valid - ppl_rec[0].train = 0 - ppl_rec[0].test = 0 - ppl_rec[0].lr = 0 - - start_iter = 1 - - print() -end - -local final_iter -for iter = start_iter, global_conf.max_iter, 1 do - final_iter = iter --for final testing - global_conf.sche_log_pre = "[SCHEDULER ITER"..iter.." LR"..global_conf.lrate.."]:" - tnn = load_net(global_conf, iter - 1) - printf("===ITERATION %d LR %f===\n", iter, global_conf.lrate) - global_conf.dropout_rate = nerv.SUtil.sche_get(global_conf.dropout_list, iter) - result = LMTrainer.lm_process_file_rnn(global_conf, global_conf.train_fn_shuf, tnn, true) --true update! - global_conf.dropout_rate = 0 - ppl_rec[iter] = {} - ppl_rec[iter].train = result:ppl_all("rnn") - --shuffling training file - printf("%s shuffling training file\n", global_conf.sche_log_pre) - os.execute('cp ' .. global_conf.train_fn_shuf .. ' ' .. global_conf.train_fn_shuf_bak) - os.execute('cat ' .. global_conf.train_fn_shuf_bak .. ' | sort -R --random-source=/dev/zero > ' .. global_conf.train_fn_shuf) - printf("===PEEK ON TEST %d===\n", iter) - result = LMTrainer.lm_process_file_rnn(global_conf, global_conf.test_fn, tnn, false) --false update! - ppl_rec[iter].test = result:ppl_all("rnn") - printf("===VALIDATION %d===\n", iter) - result = LMTrainer.lm_process_file_rnn(global_conf, global_conf.valid_fn, tnn, false) --false update! - ppl_rec[iter].valid = result:ppl_all("rnn") - ppl_rec[iter].lr = global_conf.lrate - if ((ppl_last / ppl_rec[iter].valid < 1.0003 or lr_half == true) and iter > global_conf.decay_iter) then - global_conf.lrate = (global_conf.lrate * 0.6) - end - if ppl_rec[iter].valid < ppl_last then - printf("%s PPL improves, saving net to file %s.%d...\n", global_conf.sche_log_pre, global_conf.param_fn, iter) - global_conf.paramRepo:export(global_conf.param_fn .. '.' .. tostring(iter), nil) - else - printf("%s PPL did not improve, rejected, copying param file of last iter...\n", global_conf.sche_log_pre) - os.execute('cp ' .. global_conf.param_fn..'.'..tostring(iter - 1) .. ' ' .. global_conf.param_fn..'.'..tostring(iter)) +local final_iter = -1 +if commands["train"] == 1 then + if start_iter == -1 then + prepare_parameters(global_conf, -1) --write pre_generated params to param.0 file + end + + if start_iter == -1 or start_iter == 0 then + print("===INITIAL VALIDATION===") + local tnn = load_net(global_conf, 0) + global_conf.paramRepo = tnn:get_params() --get auto-generted params + global_conf.paramRepo:export(global_conf.param_fn .. '.0', nil) --some parameters are auto-generated, saved again to param.0 file + global_conf.dropout_rate = 0 + local result = LMTrainer.lm_process_file_rnn(global_conf, global_conf.valid_fn, tnn, false) --false update! + nerv.LMUtil.wait(1) + ppl_rec[0] = {} + ppl_rec[0].valid = result:ppl_all("rnn") + ppl_last = ppl_rec[0].valid + ppl_rec[0].train = 0 + ppl_rec[0].test = 0 + ppl_rec[0].lr = 0 + + start_iter = 1 + + print() + end + + for iter = start_iter, global_conf.max_iter, 1 do + final_iter = iter --for final testing + global_conf.sche_log_pre = "[SCHEDULER ITER"..iter.." LR"..global_conf.lrate.."]:" + tnn = load_net(global_conf, iter - 1) + printf("===ITERATION %d LR %f===\n", iter, global_conf.lrate) + global_conf.dropout_rate = nerv.SUtil.sche_get(global_conf.dropout_list, iter) + result = LMTrainer.lm_process_file_rnn(global_conf, global_conf.train_fn_shuf, tnn, true) --true update! + global_conf.dropout_rate = 0 + ppl_rec[iter] = {} + ppl_rec[iter].train = result:ppl_all("rnn") + --shuffling training file + printf("%s shuffling training file\n", global_conf.sche_log_pre) + os.execute('cp ' .. global_conf.train_fn_shuf .. ' ' .. global_conf.train_fn_shuf_bak) + os.execute('cat ' .. global_conf.train_fn_shuf_bak .. ' | sort -R --random-source=/dev/zero > ' .. global_conf.train_fn_shuf) + printf("===PEEK ON TEST %d===\n", iter) + result = LMTrainer.lm_process_file_rnn(global_conf, global_conf.test_fn, tnn, false) --false update! + ppl_rec[iter].test = result:ppl_all("rnn") + printf("===VALIDATION %d===\n", iter) + result = LMTrainer.lm_process_file_rnn(global_conf, global_conf.valid_fn, tnn, false) --false update! + ppl_rec[iter].valid = result:ppl_all("rnn") + ppl_rec[iter].lr = global_conf.lrate + if ((ppl_last / ppl_rec[iter].valid < global_conf.lr_decay or lr_half == true) and iter > global_conf.decay_iter) then + global_conf.lrate = (global_conf.lrate * 0.6) + end + if ppl_rec[iter].valid < ppl_last then + printf("%s PPL improves, saving net to file %s.%d...\n", global_conf.sche_log_pre, global_conf.param_fn, iter) + global_conf.paramRepo:export(global_conf.param_fn .. '.' .. tostring(iter), nil) + else + printf("%s PPL did not improve, rejected, copying param file of last iter...\n", global_conf.sche_log_pre) + os.execute('cp ' .. global_conf.param_fn..'.'..tostring(iter - 1) .. ' ' .. global_conf.param_fn..'.'..tostring(iter)) + end + if ppl_last / ppl_rec[iter].valid < global_conf.lr_decay or lr_half == true then + lr_half = true + end + if ppl_rec[iter].valid < ppl_last then + ppl_last = ppl_rec[iter].valid + end + printf("\n") + nerv.LMUtil.wait(2) + end + nerv.info("saving final nn to param.final") + os.execute('cp ' .. global_conf.param_fn .. '.' .. tostring(final_iter) .. ' ' .. global_conf.param_fn .. '.final') + + printf("===VALIDATION PPL record===\n") + for i, _ in pairs(ppl_rec) do + printf("<ITER%d LR%.5f train:%.3f valid:%.3f test:%.3f> \n", i, ppl_rec[i].lr, ppl_rec[i].train, ppl_rec[i].valid, ppl_rec[i].test) end - if ppl_last / ppl_rec[iter].valid < 1.0003 or lr_half == true then - lr_half = true + printf("\n") +end --if commands["train"] + +if commands["test"] == 1 then + printf("===FINAL TEST===\n") + global_conf.sche_log_pre = "[SCHEDULER FINAL_TEST]:" + if final_iter ~= -1 and test_iter == -1 then + test_iter = final_iter end - if ppl_rec[iter].valid < ppl_last then - ppl_last = ppl_rec[iter].valid + if test_iter == -1 then + test_iter = "final" end - printf("\n") - nerv.LMUtil.wait(2) -end -printf("===VALIDATION PPL record===\n") -for i, _ in pairs(ppl_rec) do - printf("<ITER%d LR%.5f train:%.3f valid:%.3f test:%.3f> \n", i, ppl_rec[i].lr, ppl_rec[i].train, ppl_rec[i].valid, ppl_rec[i].test) -end -printf("\n") -printf("===FINAL TEST===\n") -global_conf.sche_log_pre = "[SCHEDULER FINAL_TEST]:" -tnn = load_net(global_conf, final_iter) -LMTrainer.lm_process_file_rnn(global_conf, global_conf.test_fn, tnn, false) --false update! - + tnn = load_net(global_conf, test_iter) + global_conf.dropout_rate = 0 + LMTrainer.lm_process_file_rnn(global_conf, global_conf.test_fn, tnn, false) --false update! +end --if commands["test"] diff --git a/nerv/examples/lmptb/m-tests/sutil_test.lua b/nerv/examples/lmptb/m-tests/sutil_test.lua index 3f9bf9e..058de7e 100644 --- a/nerv/examples/lmptb/m-tests/sutil_test.lua +++ b/nerv/examples/lmptb/m-tests/sutil_test.lua @@ -1,4 +1,10 @@ --require "tnn.init" -ss = "0.1*1:2" +local ss = "0.1*1:2" nerv.SUtil.parse_schedule(ss) +ss = "train:test" +local coms = nerv.SUtil.parse_commands_set(ss) +print("!!!") +for p, v in pairs(coms) do + print(p,v) +end diff --git a/nerv/tnn/sutil.lua b/nerv/tnn/sutil.lua index f5bc408..d88bd8e 100644 --- a/nerv/tnn/sutil.lua +++ b/nerv/tnn/sutil.lua @@ -50,3 +50,15 @@ function Util.sche_get(s, it) return s[#s] end end + +function Util.parse_commands_set(str) + local coms = {} + local s = Util.simple_split(str, ':,') + for i = 1 ,#s do + if coms[s[i]] == 1 then + nerv.warning("nerv.SUtil.parse_commands_set command(%s) appered more than once in command_set(%s)", s[i], str) + end + coms[s[i]] = 1 + end + return coms +end |