diff options
Diffstat (limited to 'nerv/examples/lmptb/main.lua')
-rw-r--r-- | nerv/examples/lmptb/main.lua | 375 |
1 files changed, 375 insertions, 0 deletions
diff --git a/nerv/examples/lmptb/main.lua b/nerv/examples/lmptb/main.lua new file mode 100644 index 0000000..8764998 --- /dev/null +++ b/nerv/examples/lmptb/main.lua @@ -0,0 +1,375 @@ +require 'lmptb.lmvocab' +require 'lmptb.lmfeeder' +require 'lmptb.lmutil' +nerv.include('lmptb/layer/init.lua') + +--[[global function rename]]-- +printf = nerv.printf +--[[global function rename ends]]-- + +--global_conf: table +--first_time: bool +--Returns: a ParamRepo +function prepare_parameters(global_conf, first_time) + printf("%s preparing parameters...\n", global_conf.sche_log_pre) + + if (first_time) then + ltp_ih = nerv.LinearTransParam("ltp_ih", global_conf) + ltp_ih.trans = global_conf.cumat_type(global_conf.vocab:size(), global_conf.hidden_size) + ltp_ih.trans:generate(global_conf.param_random) + + ltp_hh = nerv.LinearTransParam("ltp_hh", global_conf) + ltp_hh.trans = global_conf.cumat_type(global_conf.hidden_size, global_conf.hidden_size) + ltp_hh.trans:generate(global_conf.param_random) + + ltp_ho = nerv.LinearTransParam("ltp_ho", global_conf) + ltp_ho.trans = global_conf.cumat_type(global_conf.hidden_size, global_conf.vocab:size()) + ltp_ho.trans:generate(global_conf.param_random) + + bp_h = nerv.BiasParam("bp_h", global_conf) + bp_h.trans = global_conf.cumat_type(1, global_conf.hidden_size) + bp_h.trans:generate(global_conf.param_random) + + bp_o = nerv.BiasParam("bp_o", global_conf) + bp_o.trans = global_conf.cumat_type(1, global_conf.vocab:size()) + bp_o.trans:generate(global_conf.param_random) + + local f = nerv.ChunkFile(global_conf.param_fn, 'w') + f:write_chunk(ltp_ih) + f:write_chunk(ltp_hh) + f:write_chunk(ltp_ho) + f:write_chunk(bp_h) + f:write_chunk(bp_o) + f:close() + end + + local paramRepo = nerv.ParamRepo() + paramRepo:import({global_conf.param_fn}, nil, global_conf) + + printf("%s preparing parameters end.\n", global_conf.sche_log_pre) + + return paramRepo +end + +--global_conf: table +--Returns: nerv.LayerRepo +function prepare_layers(global_conf, paramRepo) + printf("%s preparing layers...\n", global_conf.sche_log_pre) + local recurrentLconfig = {{["bp"] = "bp_h", ["ltp_hh"] = "ltp_hh"}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["break_id"] = global_conf.vocab:get_sen_entry().id, ["independent"] = global_conf.independent, ["clip"] = 10}} + local layers = { + ["nerv.IndRecurrentLayer"] = { + ["recurrentL1"] = recurrentLconfig, + }, + + ["nerv.SelectLinearLayer"] = { + ["selectL1"] = {{["ltp"] = "ltp_ih"}, {["dim_in"] = {1}, ["dim_out"] = {global_conf.hidden_size}}}, + }, + + ["nerv.SigmoidLayer"] = { + ["sigmoidL1"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}} + }, + + ["nerv.AffineLayer"] = { + ["outputL"] = {{["ltp"] = "ltp_ho", ["bp"] = "bp_o"}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.vocab:size()}}}, + }, + + ["nerv.SoftmaxCELayer"] = { + ["softmaxL"] = {{}, {["dim_in"] = {global_conf.vocab:size(), global_conf.vocab:size()}, ["dim_out"] = {1}}}, + }, + } + + printf("%s adding %d bptt layers...\n", global_conf.sche_log_pre, global_conf.bptt) + for i = 1, global_conf.bptt do + layers["nerv.IndRecurrentLayer"]["recurrentL" .. (i + 1)] = recurrentLconfig + layers["nerv.SigmoidLayer"]["sigmoidL" .. (i + 1)] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}} + layers["nerv.SelectLinearLayer"]["selectL" .. (i + 1)] = {{["ltp"] = "ltp_ih"}, {["dim_in"] = {1}, ["dim_out"] = {global_conf.hidden_size}}} + end + local layerRepo = nerv.LayerRepo(layers, paramRepo, global_conf) + printf("%s preparing layers end.\n", global_conf.sche_log_pre) + return layerRepo +end + +--global_conf: table +--layerRepo: nerv.LayerRepo +--Returns: a nerv.DAGLayer +function prepare_dagLayer(global_conf, layerRepo) + printf("%s Initing daglayer ...\n", global_conf.sche_log_pre) + + --input: input_w, input_w, ... input_w_now, last_activation + local dim_in_t = {} + for i = 1, global_conf.bptt + 1 do dim_in_t[i] = 1 end + dim_in_t[global_conf.bptt + 2] = global_conf.hidden_size + dim_in_t[global_conf.bptt + 3] = global_conf.vocab:size() + --[[ softmax + | \ + ouptut i(bptt+3) + | + recurrentL(bptt+1)... recurrentL2-recurrentL1 + selectL(bptt+1) selectL2 selectL1 + / | | | + i(bptt+2) i(bptt+1) i2 i1 + ]]-- + local connections_t = { + ["selectL1[1]"] = "recurrentL1[1]", + ["recurrentL1[1]"] = "sigmoidL1[1]", + ["sigmoidL1[1]"] = "outputL[1]", + ["outputL[1]"] = "softmaxL[1]", + ["softmaxL[1]"] = "<output>[1]" + } + for i = 1, global_conf.bptt, 1 do + connections_t["<input>["..i.."]"] = "selectL"..i.."[1]" + connections_t["selectL"..(i+1).."[1]"] = "recurrentL"..(i+1).."[1]" + connections_t["recurrentL"..(i+1).."[1]"] = "sigmoidL"..(i+1).."[1]" + connections_t["sigmoidL"..(i+1).."[1]"] = "recurrentL"..i.."[2]" + end + connections_t["<input>["..(global_conf.bptt+1).."]"] = "selectL"..(global_conf.bptt+1).."[1]" + connections_t["<input>["..(global_conf.bptt+2).."]"] = "recurrentL"..(global_conf.bptt+1).."[2]" + connections_t["<input>["..(global_conf.bptt+3).."]"] = "softmaxL[2]" + printf("%s printing DAG connections:\n", global_conf.sche_log_pre) + for key, value in pairs(connections_t) do + printf("\t%s->%s\n", key, value) + end + + local dagL = nerv.DAGLayer("dagL", global_conf, {["dim_in"] = dim_in_t, ["dim_out"] = {1}, ["sub_layers"] = layerRepo, + ["connections"] = connections_t, + }) + dagL:init(global_conf.batch_size) + printf("%s Initing DAGLayer end.\n", global_conf.sche_log_pre) + return dagL +end + +--global_conf: table +--dagL: nerv.DAGLayer +--fn: string +--config: table +--Returns: table, result +function propagateFile(global_conf, dagL, fn, config) + printf("%s Begining doing on %s...\n", global_conf.sche_log_pre, fn) + if (config.do_train == true) then printf("%s do_train in config is true.\n", global_conf.sche_log_pre) end + local feeder = nerv.LMFeeder(global_conf, global_conf.batch_size, global_conf.vocab) + feeder:open_file(fn) + + local tnow = 1 + local token_store = {} + local hidden_store = {} + local sigmoidL_ref = dagL.layers["sigmoidL1"] + + token_store[tnow] = feeder:get_batch() + for i = 1, global_conf.bptt + 1 do + hidden_store[tnow - i] = global_conf.cumat_type(global_conf.batch_size, global_conf.hidden_size) + hidden_store[tnow - i]:fill(0) + token_store[tnow - i] = {} + for j = 1, global_conf.batch_size do token_store[tnow - i][j] = global_conf.vocab.null_token end + end + + local dagL_input = {} + for i = 1, global_conf.bptt + 1 do + dagL_input[i] = nerv.MMatrixInt(global_conf.batch_size, 1) + end + dagL_input[global_conf.bptt + 2] = global_conf.cumat_type(global_conf.batch_size, global_conf.hidden_size) + dagL_input[global_conf.bptt + 3] = global_conf.cumat_type(global_conf.batch_size, global_conf.vocab:size()) + + local dagL_output = {global_conf.cumat_type(global_conf.batch_size, 1)} + local dagL_err = {nil} --{global_conf.cumat_type(global_conf.batch_size, 1)} + local dagL_input_err = {} + for i = 1, global_conf.bptt + 1 do + dagL_input_err[i] = nil --global_conf.cumat_type(global_conf.batch_size, global_conf.vocab:size()) + end + dagL_input_err[global_conf.bptt + 2] = global_conf.cumat_type(global_conf.batch_size, global_conf.hidden_size) + dagL_input_err[global_conf.bptt + 3] = global_conf.cumat_type(global_conf.batch_size, global_conf.vocab:size()) + + local result = nerv.LMResult(global_conf, global_conf.vocab) + result:init("rnn") + + global_conf.input_word_id = {} + while (1) do + token_store[tnow + 1] = feeder:get_batch() --The next word(to predict) + if (token_store[tnow + 1] == nil) then break end + + --dagL:propagate(dagL_input, dagL_output) + for i = 1, global_conf.bptt + 1 do + nerv.LMUtil.set_id(dagL_input[i], token_store[tnow - i + 1], global_conf.vocab) + global_conf.input_word_id["recurrentL"..i] = dagL_input[i] --for IndRecurrent + end + dagL_input[global_conf.bptt + 2]:copy_fromd(hidden_store[tnow - global_conf.bptt - 1]) + nerv.LMUtil.set_onehot(dagL_input[global_conf.bptt + 3], token_store[tnow + 1], global_conf.vocab) + + --local dagL_input = create_dag_input(global_conf, token_store, hidden_store, tnow) + global_conf.timer:tic("dagL-propagate") + dagL:propagate(dagL_input, dagL_output) + global_conf.timer:toc("dagL-propagate") + + hidden_store[tnow] = global_conf.cumat_type(global_conf.batch_size, global_conf.hidden_size) + hidden_store[tnow]:copy_fromd(sigmoidL_ref.outputs[1]) + + if (config.do_train == true) then + global_conf.timer:tic("dagL-back_propagate") + dagL:back_propagate(dagL_err, dagL_input_err, dagL_input, dagL_output) + global_conf.timer:toc("dagL-back_propagate") + global_conf.timer:tic("dagL-update") + dagL:update(dagL_err, dagL_input, dagL_output) + global_conf.timer:toc("dagL-update") + end + + for i = 1, global_conf.batch_size, 1 do + if (token_store[tnow + 1][i] ~= global_conf.vocab.null_token) then + result:add("rnn", token_store[tnow + 1][i], math.exp(dagL_output[1][i - 1][0])) + if (config.report_word == true) then + printf("%s %s: <stream %d> <prob %f>\n", global_conf.sche_log_pre, token_store[tnow + 1][i], i, math.exp(dagL_output[1][i - 1][0])) + end + end + if (result["rnn"].cn_w % global_conf.log_w_num == 0) then + printf("%s %d words processed %s.\n", global_conf.sche_log_pre, result["rnn"].cn_w, os.date()) + for key, value in pairs(global_conf.timer.rec) do + printf("\t [global_conf.timer]: time spent on %s:%.5fs\n", key, value) + end + global_conf.timer:flush() + --nerv.CuMatrix.print_profile() + --nerv.CuMatrix.clear_profile() + end + end + + token_store[tnow - 2 - global_conf.bptt] = nil + hidden_store[tnow - 2 - global_conf.bptt] = nil + collectgarbage("collect") + + tnow = tnow + 1 + end + + printf("%s Displaying result:\n", global_conf.sche_log_pre) + printf("%s %s\n", global_conf.sche_log_pre, result:status("rnn")) + printf("%s Doing on %s end.\n", global_conf.sche_log_pre, fn) + + return result +end + +--returns dagL, paramRepo +function load_net(global_conf) + local paramRepo = prepare_parameters(global_conf, false) + local layerRepo = prepare_layers(global_conf, paramRepo) + local dagL = prepare_dagLayer(global_conf, layerRepo) + return dagL, paramRepo +end + +--[[global settings]]-- +local set = "ptb" + +if (set == "ptb") then + train_fn = "/slfs1/users/txh18/workspace/nerv-project/nerv/nerv/examples/lmptb/PTBdata/ptb.train.txt" + valid_fn = "/slfs1/users/txh18/workspace/nerv-project/nerv/nerv/examples/lmptb/PTBdata/ptb.valid.txt" + test_fn = "/slfs1/users/txh18/workspace/nerv-project/nerv/nerv/examples/lmptb/PTBdata/ptb.test.txt" + work_dir_base = "/slfs1/users/txh18/workspace/nerv-project/lmptb-work" + global_conf = { + lrate = 1, wcost = 1e-6, momentum = 0, + cumat_type = nerv.CuMatrixFloat, + mmat_type = nerv.CuMatrixFloat, + + hidden_size = 200, + batch_size = 10, + bptt = 6, --train bptt_block's words. could be set to zero + max_iter = 18, + param_random = function() return (math.random() / 5 - 0.1) end, + independent = true, + + train_fn = train_fn, + valid_fn = valid_fn, + test_fn = test_fn, + sche_log_pre = "[SCHEDULER]:", + log_w_num = 50000, --give a message when log_w_num words have been processed + timer = nerv.Timer() + } + global_conf.work_dir = work_dir_base.."/h"..global_conf.hidden_size.."bp"..global_conf.bptt.."slr"..global_conf.lrate..os.date("_%bD%dH%H") + global_conf.param_fn = global_conf.work_dir.."/params" +elseif (set == "test") then + train_fn = "/slfs1/users/txh18/workspace/nerv-project/some-text" + valid_fn = "/slfs1/users/txh18/workspace/nerv-project/some-text" + test_fn = "/slfs1/users/txh18/workspace/nerv-project/some-text" + work_dir = "/slfs1/users/txh18/workspace/nerv-project/lmptb-work-play" + global_conf = { + lrate = 0.1, wcost = 1e-6, momentum = 0, + cumat_type = nerv.CuMatrixFloat, + mmat_type = nerv.CuMatrixFloat, + + hidden_size = 5, + batch_size = 1, + bptt = 0, --train bptt_block's words. could be set to zero + max_iter = 15, + param_random = function() return (math.random() / 5 - 0.1) end, + independent = true, + + train_fn = train_fn, + valid_fn = valid_fn, + test_fn = test_fn, + work_dir = work_dir, + param_fn = work_dir .. "/params", + + sche_log_pre = "[SCHEDULER]:", + log_w_num = 80000, --give a message when log_w_num words have been processed + timer = nerv.Timer() + } +end + +local vocab = nerv.LMVocab() +global_conf["vocab"] = vocab + +printf("%s printing global_conf...\n", global_conf.sche_log_pre) +for key, value in pairs(global_conf) do + printf("\t%s=%s\n", key, value) +end +printf("%s wait 3 seconds...\n", global_conf.sche_log_pre) +nerv.LMUtil.wait(3) +printf("%s creating work_dir...\n", global_conf.sche_log_pre) +os.execute("mkdir -p "..global_conf.work_dir) + +scheduler = " printf(\"===INITIAL VALIDATION===\\n\") \ + dagL, paramRepo = load_net(global_conf) \ + local result = propagateFile(global_conf, dagL, global_conf.valid_fn, {do_train = false, report_word = false}) \ + ppl_rec = {} \ + lr_rec = {} \ + ppl_rec[0] = result:ppl_net(\"rnn\") ppl_last = ppl_rec[0] \ + lr_rec[0] = 0 \ + printf(\"\\n\") \ + local lr_half = false \ + for iter = 1, global_conf.max_iter, 1 do \ + printf(\"===ITERATION %d LR %f===\\n\", iter, global_conf.lrate) \ + global_conf.sche_log_pre = \"[SCHEDULER ITER\"..iter..\" LR\"..global_conf.lrate..\"]:\" \ + dagL, paramRepo = load_net(global_conf) \ + propagateFile(global_conf, dagL, global_conf.train_fn, {do_train = true, report_word = false}) \ + printf(\"===VALIDATION %d===\\n\", iter) \ + local result = propagateFile(global_conf, dagL, global_conf.valid_fn, {do_train = false, report_word = false}) \ + ppl_rec[iter] = result:ppl_net(\"rnn\") \ + lr_rec[iter] = global_conf.lrate \ + if (ppl_last / ppl_rec[iter] < 1.03 or lr_half == true) then \ + global_conf.lrate = (global_conf.lrate / 2) \ + lr_half = true \ + end \ + if (ppl_rec[iter] < ppl_last) then \ + printf(\"%s saving net to file %s...\\n\", global_conf.sche_log_pre, global_conf.param_fn) \ + paramRepo:export(global_conf.param_fn, nil) \ + ppl_last = ppl_rec[iter] \ + else \ + printf(\"%s PPL did not improve, rejected...\\n\", global_conf.sche_log_pre) \ + end \ + printf(\"\\n\") \ + nerv.LMUtil.wait(2) \ + end \ + printf(\"===VALIDATION PPL record===\\n\") \ + for i = 0, #ppl_rec do printf(\"<ITER%d LR%.5f: %.3f> \", i, lr_rec[i], ppl_rec[i]) end \ + printf(\"\\n\") \ + printf(\"===FINAL TEST===\\n\") \ + global_conf.sche_log_pre = \"[SCHEDULER FINAL_TEST]:\" \ + dagL, _ = load_net(global_conf) \ + propagateFile(global_conf, dagL, global_conf.test_fn, {do_train = false, report_word = false})" +printf("%s printing schedule:\n", global_conf.sche_log_pre) +printf("%s\n", scheduler) +printf("%s wait 3 seconds...\n", global_conf.sche_log_pre) +nerv.LMUtil.wait(3) +--[[global settings end]]-- + +global_conf.vocab:build_file(global_conf.train_fn) + +prepare_parameters(global_conf, true) + +assert(loadstring(scheduler))() + + |