path: root/nerv/examples/lmptb/main.lua
diff options
Diffstat (limited to 'nerv/examples/lmptb/main.lua')
1 files changed, 375 insertions, 0 deletions
diff --git a/nerv/examples/lmptb/main.lua b/nerv/examples/lmptb/main.lua
new file mode 100644
index 0000000..8764998
--- /dev/null
+++ b/nerv/examples/lmptb/main.lua
@@ -0,0 +1,375 @@
+require 'lmptb.lmvocab'
+require 'lmptb.lmfeeder'
+require 'lmptb.lmutil'
+--[[global function rename]]--
+printf = nerv.printf
+--[[global function rename ends]]--
+--global_conf: table
+--first_time: bool
+--Returns: a ParamRepo
+function prepare_parameters(global_conf, first_time)
+ printf("%s preparing parameters...\n", global_conf.sche_log_pre)
+ if (first_time) then
+ ltp_ih = nerv.LinearTransParam("ltp_ih", global_conf)
+ ltp_ih.trans = global_conf.cumat_type(global_conf.vocab:size(), global_conf.hidden_size)
+ ltp_ih.trans:generate(global_conf.param_random)
+ ltp_hh = nerv.LinearTransParam("ltp_hh", global_conf)
+ ltp_hh.trans = global_conf.cumat_type(global_conf.hidden_size, global_conf.hidden_size)
+ ltp_hh.trans:generate(global_conf.param_random)
+ ltp_ho = nerv.LinearTransParam("ltp_ho", global_conf)
+ ltp_ho.trans = global_conf.cumat_type(global_conf.hidden_size, global_conf.vocab:size())
+ ltp_ho.trans:generate(global_conf.param_random)
+ bp_h = nerv.BiasParam("bp_h", global_conf)
+ bp_h.trans = global_conf.cumat_type(1, global_conf.hidden_size)
+ bp_h.trans:generate(global_conf.param_random)
+ bp_o = nerv.BiasParam("bp_o", global_conf)
+ bp_o.trans = global_conf.cumat_type(1, global_conf.vocab:size())
+ bp_o.trans:generate(global_conf.param_random)
+ local f = nerv.ChunkFile(global_conf.param_fn, 'w')
+ f:write_chunk(ltp_ih)
+ f:write_chunk(ltp_hh)
+ f:write_chunk(ltp_ho)
+ f:write_chunk(bp_h)
+ f:write_chunk(bp_o)
+ f:close()
+ end
+ local paramRepo = nerv.ParamRepo()
+ paramRepo:import({global_conf.param_fn}, nil, global_conf)
+ printf("%s preparing parameters end.\n", global_conf.sche_log_pre)
+ return paramRepo
+--global_conf: table
+--Returns: nerv.LayerRepo
+function prepare_layers(global_conf, paramRepo)
+ printf("%s preparing layers...\n", global_conf.sche_log_pre)
+ local recurrentLconfig = {{["bp"] = "bp_h", ["ltp_hh"] = "ltp_hh"}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["break_id"] = global_conf.vocab:get_sen_entry().id, ["independent"] = global_conf.independent, ["clip"] = 10}}
+ local layers = {
+ ["nerv.IndRecurrentLayer"] = {
+ ["recurrentL1"] = recurrentLconfig,
+ },
+ ["nerv.SelectLinearLayer"] = {
+ ["selectL1"] = {{["ltp"] = "ltp_ih"}, {["dim_in"] = {1}, ["dim_out"] = {global_conf.hidden_size}}},
+ },
+ ["nerv.SigmoidLayer"] = {
+ ["sigmoidL1"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}}
+ },
+ ["nerv.AffineLayer"] = {
+ ["outputL"] = {{["ltp"] = "ltp_ho", ["bp"] = "bp_o"}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.vocab:size()}}},
+ },
+ ["nerv.SoftmaxCELayer"] = {
+ ["softmaxL"] = {{}, {["dim_in"] = {global_conf.vocab:size(), global_conf.vocab:size()}, ["dim_out"] = {1}}},
+ },
+ }
+ printf("%s adding %d bptt layers...\n", global_conf.sche_log_pre, global_conf.bptt)
+ for i = 1, global_conf.bptt do
+ layers["nerv.IndRecurrentLayer"]["recurrentL" .. (i + 1)] = recurrentLconfig
+ layers["nerv.SigmoidLayer"]["sigmoidL" .. (i + 1)] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}}
+ layers["nerv.SelectLinearLayer"]["selectL" .. (i + 1)] = {{["ltp"] = "ltp_ih"}, {["dim_in"] = {1}, ["dim_out"] = {global_conf.hidden_size}}}
+ end
+ local layerRepo = nerv.LayerRepo(layers, paramRepo, global_conf)
+ printf("%s preparing layers end.\n", global_conf.sche_log_pre)
+ return layerRepo
+--global_conf: table
+--layerRepo: nerv.LayerRepo
+--Returns: a nerv.DAGLayer
+function prepare_dagLayer(global_conf, layerRepo)
+ printf("%s Initing daglayer ...\n", global_conf.sche_log_pre)
+ --input: input_w, input_w, ... input_w_now, last_activation
+ local dim_in_t = {}
+ for i = 1, global_conf.bptt + 1 do dim_in_t[i] = 1 end
+ dim_in_t[global_conf.bptt + 2] = global_conf.hidden_size
+ dim_in_t[global_conf.bptt + 3] = global_conf.vocab:size()
+ --[[ softmax
+ | \
+ ouptut i(bptt+3)
+ |
+ recurrentL(bptt+1)... recurrentL2-recurrentL1
+ selectL(bptt+1) selectL2 selectL1
+ / | | |
+ i(bptt+2) i(bptt+1) i2 i1
+ ]]--
+ local connections_t = {
+ ["selectL1[1]"] = "recurrentL1[1]",
+ ["recurrentL1[1]"] = "sigmoidL1[1]",
+ ["sigmoidL1[1]"] = "outputL[1]",
+ ["outputL[1]"] = "softmaxL[1]",
+ ["softmaxL[1]"] = "<output>[1]"
+ }
+ for i = 1, global_conf.bptt, 1 do
+ connections_t["<input>["..i.."]"] = "selectL"..i.."[1]"
+ connections_t["selectL"..(i+1).."[1]"] = "recurrentL"..(i+1).."[1]"
+ connections_t["recurrentL"..(i+1).."[1]"] = "sigmoidL"..(i+1).."[1]"
+ connections_t["sigmoidL"..(i+1).."[1]"] = "recurrentL"..i.."[2]"
+ end
+ connections_t["<input>["..(global_conf.bptt+1).."]"] = "selectL"..(global_conf.bptt+1).."[1]"
+ connections_t["<input>["..(global_conf.bptt+2).."]"] = "recurrentL"..(global_conf.bptt+1).."[2]"
+ connections_t["<input>["..(global_conf.bptt+3).."]"] = "softmaxL[2]"
+ printf("%s printing DAG connections:\n", global_conf.sche_log_pre)
+ for key, value in pairs(connections_t) do
+ printf("\t%s->%s\n", key, value)
+ end
+ local dagL = nerv.DAGLayer("dagL", global_conf, {["dim_in"] = dim_in_t, ["dim_out"] = {1}, ["sub_layers"] = layerRepo,
+ ["connections"] = connections_t,
+ })
+ dagL:init(global_conf.batch_size)
+ printf("%s Initing DAGLayer end.\n", global_conf.sche_log_pre)
+ return dagL
+--global_conf: table
+--dagL: nerv.DAGLayer
+--fn: string
+--config: table
+--Returns: table, result
+function propagateFile(global_conf, dagL, fn, config)
+ printf("%s Begining doing on %s...\n", global_conf.sche_log_pre, fn)
+ if (config.do_train == true) then printf("%s do_train in config is true.\n", global_conf.sche_log_pre) end
+ local feeder = nerv.LMFeeder(global_conf, global_conf.batch_size, global_conf.vocab)
+ feeder:open_file(fn)
+ local tnow = 1
+ local token_store = {}
+ local hidden_store = {}
+ local sigmoidL_ref = dagL.layers["sigmoidL1"]
+ token_store[tnow] = feeder:get_batch()
+ for i = 1, global_conf.bptt + 1 do
+ hidden_store[tnow - i] = global_conf.cumat_type(global_conf.batch_size, global_conf.hidden_size)
+ hidden_store[tnow - i]:fill(0)
+ token_store[tnow - i] = {}
+ for j = 1, global_conf.batch_size do token_store[tnow - i][j] = global_conf.vocab.null_token end
+ end
+ local dagL_input = {}
+ for i = 1, global_conf.bptt + 1 do
+ dagL_input[i] = nerv.MMatrixInt(global_conf.batch_size, 1)
+ end
+ dagL_input[global_conf.bptt + 2] = global_conf.cumat_type(global_conf.batch_size, global_conf.hidden_size)
+ dagL_input[global_conf.bptt + 3] = global_conf.cumat_type(global_conf.batch_size, global_conf.vocab:size())
+ local dagL_output = {global_conf.cumat_type(global_conf.batch_size, 1)}
+ local dagL_err = {nil} --{global_conf.cumat_type(global_conf.batch_size, 1)}
+ local dagL_input_err = {}
+ for i = 1, global_conf.bptt + 1 do
+ dagL_input_err[i] = nil --global_conf.cumat_type(global_conf.batch_size, global_conf.vocab:size())
+ end
+ dagL_input_err[global_conf.bptt + 2] = global_conf.cumat_type(global_conf.batch_size, global_conf.hidden_size)
+ dagL_input_err[global_conf.bptt + 3] = global_conf.cumat_type(global_conf.batch_size, global_conf.vocab:size())
+ local result = nerv.LMResult(global_conf, global_conf.vocab)
+ result:init("rnn")
+ global_conf.input_word_id = {}
+ while (1) do
+ token_store[tnow + 1] = feeder:get_batch() --The next word(to predict)
+ if (token_store[tnow + 1] == nil) then break end
+ --dagL:propagate(dagL_input, dagL_output)
+ for i = 1, global_conf.bptt + 1 do
+ nerv.LMUtil.set_id(dagL_input[i], token_store[tnow - i + 1], global_conf.vocab)
+ global_conf.input_word_id["recurrentL"..i] = dagL_input[i] --for IndRecurrent
+ end
+ dagL_input[global_conf.bptt + 2]:copy_fromd(hidden_store[tnow - global_conf.bptt - 1])
+ nerv.LMUtil.set_onehot(dagL_input[global_conf.bptt + 3], token_store[tnow + 1], global_conf.vocab)
+ --local dagL_input = create_dag_input(global_conf, token_store, hidden_store, tnow)
+ global_conf.timer:tic("dagL-propagate")
+ dagL:propagate(dagL_input, dagL_output)
+ global_conf.timer:toc("dagL-propagate")
+ hidden_store[tnow] = global_conf.cumat_type(global_conf.batch_size, global_conf.hidden_size)
+ hidden_store[tnow]:copy_fromd(sigmoidL_ref.outputs[1])
+ if (config.do_train == true) then
+ global_conf.timer:tic("dagL-back_propagate")
+ dagL:back_propagate(dagL_err, dagL_input_err, dagL_input, dagL_output)
+ global_conf.timer:toc("dagL-back_propagate")
+ global_conf.timer:tic("dagL-update")
+ dagL:update(dagL_err, dagL_input, dagL_output)
+ global_conf.timer:toc("dagL-update")
+ end
+ for i = 1, global_conf.batch_size, 1 do
+ if (token_store[tnow + 1][i] ~= global_conf.vocab.null_token) then
+ result:add("rnn", token_store[tnow + 1][i], math.exp(dagL_output[1][i - 1][0]))
+ if (config.report_word == true) then
+ printf("%s %s: <stream %d> <prob %f>\n", global_conf.sche_log_pre, token_store[tnow + 1][i], i, math.exp(dagL_output[1][i - 1][0]))
+ end
+ end
+ if (result["rnn"].cn_w % global_conf.log_w_num == 0) then
+ printf("%s %d words processed %s.\n", global_conf.sche_log_pre, result["rnn"].cn_w, os.date())
+ for key, value in pairs(global_conf.timer.rec) do
+ printf("\t [global_conf.timer]: time spent on %s:%.5fs\n", key, value)
+ end
+ global_conf.timer:flush()
+ --nerv.CuMatrix.print_profile()
+ --nerv.CuMatrix.clear_profile()
+ end
+ end
+ token_store[tnow - 2 - global_conf.bptt] = nil
+ hidden_store[tnow - 2 - global_conf.bptt] = nil
+ collectgarbage("collect")
+ tnow = tnow + 1
+ end
+ printf("%s Displaying result:\n", global_conf.sche_log_pre)
+ printf("%s %s\n", global_conf.sche_log_pre, result:status("rnn"))
+ printf("%s Doing on %s end.\n", global_conf.sche_log_pre, fn)
+ return result
+--returns dagL, paramRepo
+function load_net(global_conf)
+ local paramRepo = prepare_parameters(global_conf, false)
+ local layerRepo = prepare_layers(global_conf, paramRepo)
+ local dagL = prepare_dagLayer(global_conf, layerRepo)
+ return dagL, paramRepo
+--[[global settings]]--
+local set = "ptb"
+if (set == "ptb") then
+ train_fn = "/slfs1/users/txh18/workspace/nerv-project/nerv/nerv/examples/lmptb/PTBdata/ptb.train.txt"
+ valid_fn = "/slfs1/users/txh18/workspace/nerv-project/nerv/nerv/examples/lmptb/PTBdata/ptb.valid.txt"
+ test_fn = "/slfs1/users/txh18/workspace/nerv-project/nerv/nerv/examples/lmptb/PTBdata/ptb.test.txt"
+ work_dir_base = "/slfs1/users/txh18/workspace/nerv-project/lmptb-work"
+ global_conf = {
+ lrate = 1, wcost = 1e-6, momentum = 0,
+ cumat_type = nerv.CuMatrixFloat,
+ mmat_type = nerv.CuMatrixFloat,
+ hidden_size = 200,
+ batch_size = 10,
+ bptt = 6, --train bptt_block's words. could be set to zero
+ max_iter = 18,
+ param_random = function() return (math.random() / 5 - 0.1) end,
+ independent = true,
+ train_fn = train_fn,
+ valid_fn = valid_fn,
+ test_fn = test_fn,
+ sche_log_pre = "[SCHEDULER]:",
+ log_w_num = 50000, --give a message when log_w_num words have been processed
+ timer = nerv.Timer()
+ }
+ global_conf.work_dir = work_dir_base.."/h"..global_conf.hidden_size.."bp"..global_conf.bptt.."slr"..global_conf.lrate..os.date("_%bD%dH%H")
+ global_conf.param_fn = global_conf.work_dir.."/params"
+elseif (set == "test") then
+ train_fn = "/slfs1/users/txh18/workspace/nerv-project/some-text"
+ valid_fn = "/slfs1/users/txh18/workspace/nerv-project/some-text"
+ test_fn = "/slfs1/users/txh18/workspace/nerv-project/some-text"
+ work_dir = "/slfs1/users/txh18/workspace/nerv-project/lmptb-work-play"
+ global_conf = {
+ lrate = 0.1, wcost = 1e-6, momentum = 0,
+ cumat_type = nerv.CuMatrixFloat,
+ mmat_type = nerv.CuMatrixFloat,
+ hidden_size = 5,
+ batch_size = 1,
+ bptt = 0, --train bptt_block's words. could be set to zero
+ max_iter = 15,
+ param_random = function() return (math.random() / 5 - 0.1) end,
+ independent = true,
+ train_fn = train_fn,
+ valid_fn = valid_fn,
+ test_fn = test_fn,
+ work_dir = work_dir,
+ param_fn = work_dir .. "/params",
+ sche_log_pre = "[SCHEDULER]:",
+ log_w_num = 80000, --give a message when log_w_num words have been processed
+ timer = nerv.Timer()
+ }
+local vocab = nerv.LMVocab()
+global_conf["vocab"] = vocab
+printf("%s printing global_conf...\n", global_conf.sche_log_pre)
+for key, value in pairs(global_conf) do
+ printf("\t%s=%s\n", key, value)
+printf("%s wait 3 seconds...\n", global_conf.sche_log_pre)
+printf("%s creating work_dir...\n", global_conf.sche_log_pre)
+os.execute("mkdir -p "..global_conf.work_dir)
+scheduler = " printf(\"===INITIAL VALIDATION===\\n\") \
+ dagL, paramRepo = load_net(global_conf) \
+ local result = propagateFile(global_conf, dagL, global_conf.valid_fn, {do_train = false, report_word = false}) \
+ ppl_rec = {} \
+ lr_rec = {} \
+ ppl_rec[0] = result:ppl_net(\"rnn\") ppl_last = ppl_rec[0] \
+ lr_rec[0] = 0 \
+ printf(\"\\n\") \
+ local lr_half = false \
+ for iter = 1, global_conf.max_iter, 1 do \
+ printf(\"===ITERATION %d LR %f===\\n\", iter, global_conf.lrate) \
+ global_conf.sche_log_pre = \"[SCHEDULER ITER\"..iter..\" LR\"..global_conf.lrate..\"]:\" \
+ dagL, paramRepo = load_net(global_conf) \
+ propagateFile(global_conf, dagL, global_conf.train_fn, {do_train = true, report_word = false}) \
+ printf(\"===VALIDATION %d===\\n\", iter) \
+ local result = propagateFile(global_conf, dagL, global_conf.valid_fn, {do_train = false, report_word = false}) \
+ ppl_rec[iter] = result:ppl_net(\"rnn\") \
+ lr_rec[iter] = global_conf.lrate \
+ if (ppl_last / ppl_rec[iter] < 1.03 or lr_half == true) then \
+ global_conf.lrate = (global_conf.lrate / 2) \
+ lr_half = true \
+ end \
+ if (ppl_rec[iter] < ppl_last) then \
+ printf(\"%s saving net to file %s...\\n\", global_conf.sche_log_pre, global_conf.param_fn) \
+ paramRepo:export(global_conf.param_fn, nil) \
+ ppl_last = ppl_rec[iter] \
+ else \
+ printf(\"%s PPL did not improve, rejected...\\n\", global_conf.sche_log_pre) \
+ end \
+ printf(\"\\n\") \
+ nerv.LMUtil.wait(2) \
+ end \
+ printf(\"===VALIDATION PPL record===\\n\") \
+ for i = 0, #ppl_rec do printf(\"<ITER%d LR%.5f: %.3f> \", i, lr_rec[i], ppl_rec[i]) end \
+ printf(\"\\n\") \
+ printf(\"===FINAL TEST===\\n\") \
+ global_conf.sche_log_pre = \"[SCHEDULER FINAL_TEST]:\" \
+ dagL, _ = load_net(global_conf) \
+ propagateFile(global_conf, dagL, global_conf.test_fn, {do_train = false, report_word = false})"
+printf("%s printing schedule:\n", global_conf.sche_log_pre)
+printf("%s\n", scheduler)
+printf("%s wait 3 seconds...\n", global_conf.sche_log_pre)
+--[[global settings end]]--
+prepare_parameters(global_conf, true)