diff options
Diffstat (limited to 'nerv/examples/lmptb/main.lua')
-rw-r--r-- | nerv/examples/lmptb/main.lua | 118 |
1 files changed, 76 insertions, 42 deletions
diff --git a/nerv/examples/lmptb/main.lua b/nerv/examples/lmptb/main.lua index e797254..8764998 100644 --- a/nerv/examples/lmptb/main.lua +++ b/nerv/examples/lmptb/main.lua @@ -55,12 +55,16 @@ end --Returns: nerv.LayerRepo function prepare_layers(global_conf, paramRepo) printf("%s preparing layers...\n", global_conf.sche_log_pre) - local recurrentLconfig = {{["bp"] = "bp_h", ["ltp_ih"] = "ltp_ih", ["ltp_hh"] = "ltp_hh"}, {["dim_in"] = {global_conf.vocab:size(), global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["break_id"] = global_conf.vocab:get_sen_entry().id, ["independent"] = global_conf.independent}} + local recurrentLconfig = {{["bp"] = "bp_h", ["ltp_hh"] = "ltp_hh"}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["break_id"] = global_conf.vocab:get_sen_entry().id, ["independent"] = global_conf.independent, ["clip"] = 10}} local layers = { - ["nerv.LMAffineRecurrentLayer"] = { + ["nerv.IndRecurrentLayer"] = { ["recurrentL1"] = recurrentLconfig, }, + ["nerv.SelectLinearLayer"] = { + ["selectL1"] = {{["ltp"] = "ltp_ih"}, {["dim_in"] = {1}, ["dim_out"] = {global_conf.hidden_size}}}, + }, + ["nerv.SigmoidLayer"] = { ["sigmoidL1"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}} }, @@ -76,8 +80,9 @@ function prepare_layers(global_conf, paramRepo) printf("%s adding %d bptt layers...\n", global_conf.sche_log_pre, global_conf.bptt) for i = 1, global_conf.bptt do - layers["nerv.LMAffineRecurrentLayer"]["recurrentL" .. (i + 1)] = recurrentLconfig + layers["nerv.IndRecurrentLayer"]["recurrentL" .. (i + 1)] = recurrentLconfig layers["nerv.SigmoidLayer"]["sigmoidL" .. (i + 1)] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}} + layers["nerv.SelectLinearLayer"]["selectL" .. (i + 1)] = {{["ltp"] = "ltp_ih"}, {["dim_in"] = {1}, ["dim_out"] = {global_conf.hidden_size}}} end local layerRepo = nerv.LayerRepo(layers, paramRepo, global_conf) printf("%s preparing layers end.\n", global_conf.sche_log_pre) @@ -92,7 +97,7 @@ function prepare_dagLayer(global_conf, layerRepo) --input: input_w, input_w, ... input_w_now, last_activation local dim_in_t = {} - for i = 1, global_conf.bptt + 1 do dim_in_t[i] = global_conf.vocab:size() end + for i = 1, global_conf.bptt + 1 do dim_in_t[i] = 1 end dim_in_t[global_conf.bptt + 2] = global_conf.hidden_size dim_in_t[global_conf.bptt + 3] = global_conf.vocab:size() --[[ softmax @@ -100,21 +105,24 @@ function prepare_dagLayer(global_conf, layerRepo) ouptut i(bptt+3) | recurrentL(bptt+1)... recurrentL2-recurrentL1 + selectL(bptt+1) selectL2 selectL1 / | | | i(bptt+2) i(bptt+1) i2 i1 ]]-- local connections_t = { + ["selectL1[1]"] = "recurrentL1[1]", ["recurrentL1[1]"] = "sigmoidL1[1]", ["sigmoidL1[1]"] = "outputL[1]", ["outputL[1]"] = "softmaxL[1]", ["softmaxL[1]"] = "<output>[1]" } for i = 1, global_conf.bptt, 1 do - connections_t["<input>["..i.."]"] = "recurrentL"..i.."[1]" + connections_t["<input>["..i.."]"] = "selectL"..i.."[1]" + connections_t["selectL"..(i+1).."[1]"] = "recurrentL"..(i+1).."[1]" connections_t["recurrentL"..(i+1).."[1]"] = "sigmoidL"..(i+1).."[1]" connections_t["sigmoidL"..(i+1).."[1]"] = "recurrentL"..i.."[2]" end - connections_t["<input>["..(global_conf.bptt+1).."]"] = "recurrentL"..(global_conf.bptt+1).."[1]" + connections_t["<input>["..(global_conf.bptt+1).."]"] = "selectL"..(global_conf.bptt+1).."[1]" connections_t["<input>["..(global_conf.bptt+2).."]"] = "recurrentL"..(global_conf.bptt+1).."[2]" connections_t["<input>["..(global_conf.bptt+3).."]"] = "softmaxL[2]" printf("%s printing DAG connections:\n", global_conf.sche_log_pre) @@ -130,18 +138,6 @@ function prepare_dagLayer(global_conf, layerRepo) return dagL end ---Returns: table -function create_dag_input(global_conf, token_store, hidden_store, tnow) - local dagL_input = {} - for i = 1, global_conf.bptt + 1 do - dagL_input[i] = nerv.LMUtil.create_onehot(token_store[tnow - i + 1], global_conf.vocab, global_conf.cumat_type) - end - dagL_input[global_conf.bptt + 2] = hidden_store[tnow - global_conf.bptt - 1] - dagL_input[global_conf.bptt + 3] = nerv.LMUtil.create_onehot(token_store[tnow + 1], global_conf.vocab, global_conf.cumat_type) - - return dagL_input -end - --global_conf: table --dagL: nerv.DAGLayer --fn: string @@ -165,32 +161,54 @@ function propagateFile(global_conf, dagL, fn, config) token_store[tnow - i] = {} for j = 1, global_conf.batch_size do token_store[tnow - i][j] = global_conf.vocab.null_token end end - + + local dagL_input = {} + for i = 1, global_conf.bptt + 1 do + dagL_input[i] = nerv.MMatrixInt(global_conf.batch_size, 1) + end + dagL_input[global_conf.bptt + 2] = global_conf.cumat_type(global_conf.batch_size, global_conf.hidden_size) + dagL_input[global_conf.bptt + 3] = global_conf.cumat_type(global_conf.batch_size, global_conf.vocab:size()) + local dagL_output = {global_conf.cumat_type(global_conf.batch_size, 1)} local dagL_err = {nil} --{global_conf.cumat_type(global_conf.batch_size, 1)} local dagL_input_err = {} for i = 1, global_conf.bptt + 1 do - dagL_input_err[i] = global_conf.cumat_type(global_conf.batch_size, global_conf.vocab:size()) + dagL_input_err[i] = nil --global_conf.cumat_type(global_conf.batch_size, global_conf.vocab:size()) end dagL_input_err[global_conf.bptt + 2] = global_conf.cumat_type(global_conf.batch_size, global_conf.hidden_size) dagL_input_err[global_conf.bptt + 3] = global_conf.cumat_type(global_conf.batch_size, global_conf.vocab:size()) local result = nerv.LMResult(global_conf, global_conf.vocab) result:init("rnn") - + + global_conf.input_word_id = {} while (1) do token_store[tnow + 1] = feeder:get_batch() --The next word(to predict) if (token_store[tnow + 1] == nil) then break end - local dagL_input = create_dag_input(global_conf, token_store, hidden_store, tnow) --dagL:propagate(dagL_input, dagL_output) + for i = 1, global_conf.bptt + 1 do + nerv.LMUtil.set_id(dagL_input[i], token_store[tnow - i + 1], global_conf.vocab) + global_conf.input_word_id["recurrentL"..i] = dagL_input[i] --for IndRecurrent + end + dagL_input[global_conf.bptt + 2]:copy_fromd(hidden_store[tnow - global_conf.bptt - 1]) + nerv.LMUtil.set_onehot(dagL_input[global_conf.bptt + 3], token_store[tnow + 1], global_conf.vocab) + + --local dagL_input = create_dag_input(global_conf, token_store, hidden_store, tnow) + global_conf.timer:tic("dagL-propagate") + dagL:propagate(dagL_input, dagL_output) + global_conf.timer:toc("dagL-propagate") hidden_store[tnow] = global_conf.cumat_type(global_conf.batch_size, global_conf.hidden_size) hidden_store[tnow]:copy_fromd(sigmoidL_ref.outputs[1]) if (config.do_train == true) then - --dagL:back_propagate(dagL_err, dagL_input_err, dagL_input, dagL_output) - --dagL:update(dagL_err, dagL_input, dagL_output) + global_conf.timer:tic("dagL-back_propagate") + dagL:back_propagate(dagL_err, dagL_input_err, dagL_input, dagL_output) + global_conf.timer:toc("dagL-back_propagate") + global_conf.timer:tic("dagL-update") + dagL:update(dagL_err, dagL_input, dagL_output) + global_conf.timer:toc("dagL-update") end for i = 1, global_conf.batch_size, 1 do @@ -201,13 +219,20 @@ function propagateFile(global_conf, dagL, fn, config) end end if (result["rnn"].cn_w % global_conf.log_w_num == 0) then - printf("%s %d words processed.\n", global_conf.sche_log_pre, result["rnn"].cn_w) + printf("%s %d words processed %s.\n", global_conf.sche_log_pre, result["rnn"].cn_w, os.date()) + for key, value in pairs(global_conf.timer.rec) do + printf("\t [global_conf.timer]: time spent on %s:%.5fs\n", key, value) + end + global_conf.timer:flush() + --nerv.CuMatrix.print_profile() + --nerv.CuMatrix.clear_profile() end end token_store[tnow - 2 - global_conf.bptt] = nil hidden_store[tnow - 2 - global_conf.bptt] = nil - collectgarbage("collect") + collectgarbage("collect") + tnow = tnow + 1 end @@ -230,19 +255,19 @@ end local set = "ptb" if (set == "ptb") then - train_fn = "/home/slhome/txh18/workspace/nerv-project/nerv/examples/lmptb/PTBdata/ptb.train.txt" - valid_fn = "/home/slhome/txh18/workspace/nerv-project/nerv/examples/lmptb/PTBdata/ptb.valid.txt" - test_fn = "/home/slhome/txh18/workspace/nerv-project/nerv/examples/lmptb/PTBdata/ptb.test.txt" - work_dir_base = "/home/slhome/txh18/workspace/nerv-project/lmptb-work" + train_fn = "/slfs1/users/txh18/workspace/nerv-project/nerv/nerv/examples/lmptb/PTBdata/ptb.train.txt" + valid_fn = "/slfs1/users/txh18/workspace/nerv-project/nerv/nerv/examples/lmptb/PTBdata/ptb.valid.txt" + test_fn = "/slfs1/users/txh18/workspace/nerv-project/nerv/nerv/examples/lmptb/PTBdata/ptb.test.txt" + work_dir_base = "/slfs1/users/txh18/workspace/nerv-project/lmptb-work" global_conf = { - lrate = 0.1, wcost = 1e-6, momentum = 0, + lrate = 1, wcost = 1e-6, momentum = 0, cumat_type = nerv.CuMatrixFloat, mmat_type = nerv.CuMatrixFloat, - hidden_size = 100, + hidden_size = 200, batch_size = 10, - bptt = 3, --train bptt_block's words. could be set to zero - max_iter = 15, + bptt = 6, --train bptt_block's words. could be set to zero + max_iter = 18, param_random = function() return (math.random() / 5 - 0.1) end, independent = true, @@ -250,15 +275,16 @@ if (set == "ptb") then valid_fn = valid_fn, test_fn = test_fn, sche_log_pre = "[SCHEDULER]:", - log_w_num = 100000, --give a message when log_w_num words have been processed + log_w_num = 50000, --give a message when log_w_num words have been processed + timer = nerv.Timer() } global_conf.work_dir = work_dir_base.."/h"..global_conf.hidden_size.."bp"..global_conf.bptt.."slr"..global_conf.lrate..os.date("_%bD%dH%H") global_conf.param_fn = global_conf.work_dir.."/params" elseif (set == "test") then - train_fn = "/home/slhome/txh18/workspace/nerv-project/some-text" - valid_fn = "/home/slhome/txh18/workspace/nerv-project/some-text" - test_fn = "/home/slhome/txh18/workspace/nerv-project/some-text" - work_dir = "/home/slhome/txh18/workspace/nerv-project/lmptb-work-play" + train_fn = "/slfs1/users/txh18/workspace/nerv-project/some-text" + valid_fn = "/slfs1/users/txh18/workspace/nerv-project/some-text" + test_fn = "/slfs1/users/txh18/workspace/nerv-project/some-text" + work_dir = "/slfs1/users/txh18/workspace/nerv-project/lmptb-work-play" global_conf = { lrate = 0.1, wcost = 1e-6, momentum = 0, cumat_type = nerv.CuMatrixFloat, @@ -266,7 +292,7 @@ elseif (set == "test") then hidden_size = 5, batch_size = 1, - bptt = 1, --train bptt_block's words. could be set to zero + bptt = 0, --train bptt_block's words. could be set to zero max_iter = 15, param_random = function() return (math.random() / 5 - 0.1) end, independent = true, @@ -279,6 +305,7 @@ elseif (set == "test") then sche_log_pre = "[SCHEDULER]:", log_w_num = 80000, --give a message when log_w_num words have been processed + timer = nerv.Timer() } end @@ -298,8 +325,11 @@ scheduler = " printf(\"===INITIAL VALIDATION===\\n\") \ dagL, paramRepo = load_net(global_conf) \ local result = propagateFile(global_conf, dagL, global_conf.valid_fn, {do_train = false, report_word = false}) \ ppl_rec = {} \ + lr_rec = {} \ ppl_rec[0] = result:ppl_net(\"rnn\") ppl_last = ppl_rec[0] \ + lr_rec[0] = 0 \ printf(\"\\n\") \ + local lr_half = false \ for iter = 1, global_conf.max_iter, 1 do \ printf(\"===ITERATION %d LR %f===\\n\", iter, global_conf.lrate) \ global_conf.sche_log_pre = \"[SCHEDULER ITER\"..iter..\" LR\"..global_conf.lrate..\"]:\" \ @@ -308,8 +338,10 @@ scheduler = " printf(\"===INITIAL VALIDATION===\\n\") \ printf(\"===VALIDATION %d===\\n\", iter) \ local result = propagateFile(global_conf, dagL, global_conf.valid_fn, {do_train = false, report_word = false}) \ ppl_rec[iter] = result:ppl_net(\"rnn\") \ - if (ppl_last / ppl_rec[iter] < 1.03) then \ + lr_rec[iter] = global_conf.lrate \ + if (ppl_last / ppl_rec[iter] < 1.03 or lr_half == true) then \ global_conf.lrate = (global_conf.lrate / 2) \ + lr_half = true \ end \ if (ppl_rec[iter] < ppl_last) then \ printf(\"%s saving net to file %s...\\n\", global_conf.sche_log_pre, global_conf.param_fn) \ @@ -322,7 +354,7 @@ scheduler = " printf(\"===INITIAL VALIDATION===\\n\") \ nerv.LMUtil.wait(2) \ end \ printf(\"===VALIDATION PPL record===\\n\") \ - for i = 0, #ppl_rec do printf(\"<%d: %.2f> \", i, ppl_rec[i]) end \ + for i = 0, #ppl_rec do printf(\"<ITER%d LR%.5f: %.3f> \", i, lr_rec[i], ppl_rec[i]) end \ printf(\"\\n\") \ printf(\"===FINAL TEST===\\n\") \ global_conf.sche_log_pre = \"[SCHEDULER FINAL_TEST]:\" \ @@ -339,3 +371,5 @@ global_conf.vocab:build_file(global_conf.train_fn) prepare_parameters(global_conf, true) assert(loadstring(scheduler))() + + |