From 9cb04041b1f1aabfd67480140caa56325b95b9ad Mon Sep 17 00:00:00 2001 From: cloudygoose Date: Tue, 11 Aug 2015 11:48:51 +0800 Subject: Last port is wrong --- nerv/examples/lmptb/lmptb/layer/init.lua | 4 +- .../lmptb/lmptb/layer/lm_affine_recurrent.lua | 6 +- nerv/examples/lmptb/lmptb/lmutil.lua | 65 ++++++++++++ nerv/examples/lmptb/main.lua | 118 +++++++++++++-------- nerv/examples/lmptb/nerv | 2 +- 5 files changed, 147 insertions(+), 48 deletions(-) diff --git a/nerv/examples/lmptb/lmptb/layer/init.lua b/nerv/examples/lmptb/lmptb/layer/init.lua index b3b00f6..ff29126 100644 --- a/nerv/examples/lmptb/lmptb/layer/init.lua +++ b/nerv/examples/lmptb/lmptb/layer/init.lua @@ -1,5 +1,5 @@ -require 'lmptb.layer.affine_recurrent' +require 'lmptb.layer.select_linear' +--require 'lmptb.layer.affine_recurrent' require 'lmptb.layer.lm_affine_recurrent' - diff --git a/nerv/examples/lmptb/lmptb/layer/lm_affine_recurrent.lua b/nerv/examples/lmptb/lmptb/layer/lm_affine_recurrent.lua index f1eb4a1..c43e567 100644 --- a/nerv/examples/lmptb/lmptb/layer/lm_affine_recurrent.lua +++ b/nerv/examples/lmptb/lmptb/layer/lm_affine_recurrent.lua @@ -1,4 +1,4 @@ -local LMRecurrent = nerv.class('nerv.LMAffineRecurrentLayer', 'nerv.AffineRecurrentLayer') --breaks at sentence end, when is met, input will be set to zero +local LMRecurrent = nerv.class('nerv.IndRecurrentLayer', 'nerv.AffineRecurrentLayer') --breaks at sentence end, when is met, input will be set to zero --id: string --global_conf: table @@ -11,10 +11,10 @@ function LMRecurrent:__init(id, global_conf, layer_conf) end function LMRecurrent:propagate(input, output) - output[1]:mul(input[1], self.ltp_ih.trans, 1.0, 0.0, 'N', 'N') + output[1]:copy_fromd(input[1]) if (self.independent == true) then for i = 1, input[1]:nrow() do - if (input[1][i - 1][self.break_id - 1] > 0.1) then --here is sentence break + if (self.gconf.input_word_id[self.id][i - 1][0] == self.break_id) then --here is sentence break input[2][i - 1]:fill(0) end end diff --git a/nerv/examples/lmptb/lmptb/lmutil.lua b/nerv/examples/lmptb/lmptb/lmutil.lua index c15c637..73cf041 100644 --- a/nerv/examples/lmptb/lmptb/lmutil.lua +++ b/nerv/examples/lmptb/lmptb/lmutil.lua @@ -17,6 +17,45 @@ function Util.create_onehot(list, vocab, ty) return m end +--m: matrix +--list: table, list of string(word) +--vocab: nerv.LMVocab +--Returns: nerv.CuMatrixFloat +--Set the matrix, whose size should be size #list * vocab:size() to be one_hot according to the list. null_word will become a zero vector. +function Util.set_onehot(m, list, vocab) + if (m:nrow() ~= #list or m:ncol() ~= vocab:size()) then + nerv.error("size of matrix mismatch with list and vocab") + end + m:fill(0) + for i = 1, #list, 1 do + --index in matrix starts at 0 + if (list[i] ~= vocab.null_token) then + m[i - 1][vocab:get_word_str(list[i]).id - 1] = 1 + end + end + return m +end + +--m: matrix +--list: table, list of string(word) +--vocab: nerv.LMVocab +--Returns: nerv.MMatrixInt +--Set the matrix to be ids of the words, id starting at 1, not 0 +function Util.set_id(m, list, vocab) + if (m:nrow() ~= #list or m:ncol() ~= 1) then + nerv.error("nrow of matrix mismatch with list or its col not one") + end + for i = 1, #list, 1 do + --index in matrix starts at 0 + if (list[i] ~= vocab.null_token) then + m[i - 1][0] = vocab:get_word_str(list[i]).id + else + m[i - 1][0] = 0 + end + end + return m +end + function Util.wait(sec) local start = os.time() repeat until os.time() > start + sec @@ -66,3 +105,29 @@ end function Result:status(cla) return "LMResult status of " .. cla .. ": " .. " " end + +local Timer = nerv.class("nerv.Timer") +function Timer:__init() + self.last = {} + self.rec = {} +end + +function Timer:tic(item) + self.last[item] = os.time() +end + +function Timer:toc(item) + if (self.last[item] == nil) then + nerv.error("item not there") + end + if (self.rec[item] == nil) then + self.rec[item] = 0 + end + self.rec[item] = self.rec[item] + os.difftime(os.time(), self.last[item]) +end + +function Timer:flush() + for key, value in pairs(self.rec) do + self.rec[key] = 0 + end +end diff --git a/nerv/examples/lmptb/main.lua b/nerv/examples/lmptb/main.lua index e797254..8764998 100644 --- a/nerv/examples/lmptb/main.lua +++ b/nerv/examples/lmptb/main.lua @@ -55,12 +55,16 @@ end --Returns: nerv.LayerRepo function prepare_layers(global_conf, paramRepo) printf("%s preparing layers...\n", global_conf.sche_log_pre) - local recurrentLconfig = {{["bp"] = "bp_h", ["ltp_ih"] = "ltp_ih", ["ltp_hh"] = "ltp_hh"}, {["dim_in"] = {global_conf.vocab:size(), global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["break_id"] = global_conf.vocab:get_sen_entry().id, ["independent"] = global_conf.independent}} + local recurrentLconfig = {{["bp"] = "bp_h", ["ltp_hh"] = "ltp_hh"}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["break_id"] = global_conf.vocab:get_sen_entry().id, ["independent"] = global_conf.independent, ["clip"] = 10}} local layers = { - ["nerv.LMAffineRecurrentLayer"] = { + ["nerv.IndRecurrentLayer"] = { ["recurrentL1"] = recurrentLconfig, }, + ["nerv.SelectLinearLayer"] = { + ["selectL1"] = {{["ltp"] = "ltp_ih"}, {["dim_in"] = {1}, ["dim_out"] = {global_conf.hidden_size}}}, + }, + ["nerv.SigmoidLayer"] = { ["sigmoidL1"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}} }, @@ -76,8 +80,9 @@ function prepare_layers(global_conf, paramRepo) printf("%s adding %d bptt layers...\n", global_conf.sche_log_pre, global_conf.bptt) for i = 1, global_conf.bptt do - layers["nerv.LMAffineRecurrentLayer"]["recurrentL" .. (i + 1)] = recurrentLconfig + layers["nerv.IndRecurrentLayer"]["recurrentL" .. (i + 1)] = recurrentLconfig layers["nerv.SigmoidLayer"]["sigmoidL" .. (i + 1)] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}} + layers["nerv.SelectLinearLayer"]["selectL" .. (i + 1)] = {{["ltp"] = "ltp_ih"}, {["dim_in"] = {1}, ["dim_out"] = {global_conf.hidden_size}}} end local layerRepo = nerv.LayerRepo(layers, paramRepo, global_conf) printf("%s preparing layers end.\n", global_conf.sche_log_pre) @@ -92,7 +97,7 @@ function prepare_dagLayer(global_conf, layerRepo) --input: input_w, input_w, ... input_w_now, last_activation local dim_in_t = {} - for i = 1, global_conf.bptt + 1 do dim_in_t[i] = global_conf.vocab:size() end + for i = 1, global_conf.bptt + 1 do dim_in_t[i] = 1 end dim_in_t[global_conf.bptt + 2] = global_conf.hidden_size dim_in_t[global_conf.bptt + 3] = global_conf.vocab:size() --[[ softmax @@ -100,21 +105,24 @@ function prepare_dagLayer(global_conf, layerRepo) ouptut i(bptt+3) | recurrentL(bptt+1)... recurrentL2-recurrentL1 + selectL(bptt+1) selectL2 selectL1 / | | | i(bptt+2) i(bptt+1) i2 i1 ]]-- local connections_t = { + ["selectL1[1]"] = "recurrentL1[1]", ["recurrentL1[1]"] = "sigmoidL1[1]", ["sigmoidL1[1]"] = "outputL[1]", ["outputL[1]"] = "softmaxL[1]", ["softmaxL[1]"] = "[1]" } for i = 1, global_conf.bptt, 1 do - connections_t["["..i.."]"] = "recurrentL"..i.."[1]" + connections_t["["..i.."]"] = "selectL"..i.."[1]" + connections_t["selectL"..(i+1).."[1]"] = "recurrentL"..(i+1).."[1]" connections_t["recurrentL"..(i+1).."[1]"] = "sigmoidL"..(i+1).."[1]" connections_t["sigmoidL"..(i+1).."[1]"] = "recurrentL"..i.."[2]" end - connections_t["["..(global_conf.bptt+1).."]"] = "recurrentL"..(global_conf.bptt+1).."[1]" + connections_t["["..(global_conf.bptt+1).."]"] = "selectL"..(global_conf.bptt+1).."[1]" connections_t["["..(global_conf.bptt+2).."]"] = "recurrentL"..(global_conf.bptt+1).."[2]" connections_t["["..(global_conf.bptt+3).."]"] = "softmaxL[2]" printf("%s printing DAG connections:\n", global_conf.sche_log_pre) @@ -130,18 +138,6 @@ function prepare_dagLayer(global_conf, layerRepo) return dagL end ---Returns: table -function create_dag_input(global_conf, token_store, hidden_store, tnow) - local dagL_input = {} - for i = 1, global_conf.bptt + 1 do - dagL_input[i] = nerv.LMUtil.create_onehot(token_store[tnow - i + 1], global_conf.vocab, global_conf.cumat_type) - end - dagL_input[global_conf.bptt + 2] = hidden_store[tnow - global_conf.bptt - 1] - dagL_input[global_conf.bptt + 3] = nerv.LMUtil.create_onehot(token_store[tnow + 1], global_conf.vocab, global_conf.cumat_type) - - return dagL_input -end - --global_conf: table --dagL: nerv.DAGLayer --fn: string @@ -165,32 +161,54 @@ function propagateFile(global_conf, dagL, fn, config) token_store[tnow - i] = {} for j = 1, global_conf.batch_size do token_store[tnow - i][j] = global_conf.vocab.null_token end end - + + local dagL_input = {} + for i = 1, global_conf.bptt + 1 do + dagL_input[i] = nerv.MMatrixInt(global_conf.batch_size, 1) + end + dagL_input[global_conf.bptt + 2] = global_conf.cumat_type(global_conf.batch_size, global_conf.hidden_size) + dagL_input[global_conf.bptt + 3] = global_conf.cumat_type(global_conf.batch_size, global_conf.vocab:size()) + local dagL_output = {global_conf.cumat_type(global_conf.batch_size, 1)} local dagL_err = {nil} --{global_conf.cumat_type(global_conf.batch_size, 1)} local dagL_input_err = {} for i = 1, global_conf.bptt + 1 do - dagL_input_err[i] = global_conf.cumat_type(global_conf.batch_size, global_conf.vocab:size()) + dagL_input_err[i] = nil --global_conf.cumat_type(global_conf.batch_size, global_conf.vocab:size()) end dagL_input_err[global_conf.bptt + 2] = global_conf.cumat_type(global_conf.batch_size, global_conf.hidden_size) dagL_input_err[global_conf.bptt + 3] = global_conf.cumat_type(global_conf.batch_size, global_conf.vocab:size()) local result = nerv.LMResult(global_conf, global_conf.vocab) result:init("rnn") - + + global_conf.input_word_id = {} while (1) do token_store[tnow + 1] = feeder:get_batch() --The next word(to predict) if (token_store[tnow + 1] == nil) then break end - local dagL_input = create_dag_input(global_conf, token_store, hidden_store, tnow) --dagL:propagate(dagL_input, dagL_output) + for i = 1, global_conf.bptt + 1 do + nerv.LMUtil.set_id(dagL_input[i], token_store[tnow - i + 1], global_conf.vocab) + global_conf.input_word_id["recurrentL"..i] = dagL_input[i] --for IndRecurrent + end + dagL_input[global_conf.bptt + 2]:copy_fromd(hidden_store[tnow - global_conf.bptt - 1]) + nerv.LMUtil.set_onehot(dagL_input[global_conf.bptt + 3], token_store[tnow + 1], global_conf.vocab) + + --local dagL_input = create_dag_input(global_conf, token_store, hidden_store, tnow) + global_conf.timer:tic("dagL-propagate") + dagL:propagate(dagL_input, dagL_output) + global_conf.timer:toc("dagL-propagate") hidden_store[tnow] = global_conf.cumat_type(global_conf.batch_size, global_conf.hidden_size) hidden_store[tnow]:copy_fromd(sigmoidL_ref.outputs[1]) if (config.do_train == true) then - --dagL:back_propagate(dagL_err, dagL_input_err, dagL_input, dagL_output) - --dagL:update(dagL_err, dagL_input, dagL_output) + global_conf.timer:tic("dagL-back_propagate") + dagL:back_propagate(dagL_err, dagL_input_err, dagL_input, dagL_output) + global_conf.timer:toc("dagL-back_propagate") + global_conf.timer:tic("dagL-update") + dagL:update(dagL_err, dagL_input, dagL_output) + global_conf.timer:toc("dagL-update") end for i = 1, global_conf.batch_size, 1 do @@ -201,13 +219,20 @@ function propagateFile(global_conf, dagL, fn, config) end end if (result["rnn"].cn_w % global_conf.log_w_num == 0) then - printf("%s %d words processed.\n", global_conf.sche_log_pre, result["rnn"].cn_w) + printf("%s %d words processed %s.\n", global_conf.sche_log_pre, result["rnn"].cn_w, os.date()) + for key, value in pairs(global_conf.timer.rec) do + printf("\t [global_conf.timer]: time spent on %s:%.5fs\n", key, value) + end + global_conf.timer:flush() + --nerv.CuMatrix.print_profile() + --nerv.CuMatrix.clear_profile() end end token_store[tnow - 2 - global_conf.bptt] = nil hidden_store[tnow - 2 - global_conf.bptt] = nil - collectgarbage("collect") + collectgarbage("collect") + tnow = tnow + 1 end @@ -230,19 +255,19 @@ end local set = "ptb" if (set == "ptb") then - train_fn = "/home/slhome/txh18/workspace/nerv-project/nerv/examples/lmptb/PTBdata/ptb.train.txt" - valid_fn = "/home/slhome/txh18/workspace/nerv-project/nerv/examples/lmptb/PTBdata/ptb.valid.txt" - test_fn = "/home/slhome/txh18/workspace/nerv-project/nerv/examples/lmptb/PTBdata/ptb.test.txt" - work_dir_base = "/home/slhome/txh18/workspace/nerv-project/lmptb-work" + train_fn = "/slfs1/users/txh18/workspace/nerv-project/nerv/nerv/examples/lmptb/PTBdata/ptb.train.txt" + valid_fn = "/slfs1/users/txh18/workspace/nerv-project/nerv/nerv/examples/lmptb/PTBdata/ptb.valid.txt" + test_fn = "/slfs1/users/txh18/workspace/nerv-project/nerv/nerv/examples/lmptb/PTBdata/ptb.test.txt" + work_dir_base = "/slfs1/users/txh18/workspace/nerv-project/lmptb-work" global_conf = { - lrate = 0.1, wcost = 1e-6, momentum = 0, + lrate = 1, wcost = 1e-6, momentum = 0, cumat_type = nerv.CuMatrixFloat, mmat_type = nerv.CuMatrixFloat, - hidden_size = 100, + hidden_size = 200, batch_size = 10, - bptt = 3, --train bptt_block's words. could be set to zero - max_iter = 15, + bptt = 6, --train bptt_block's words. could be set to zero + max_iter = 18, param_random = function() return (math.random() / 5 - 0.1) end, independent = true, @@ -250,15 +275,16 @@ if (set == "ptb") then valid_fn = valid_fn, test_fn = test_fn, sche_log_pre = "[SCHEDULER]:", - log_w_num = 100000, --give a message when log_w_num words have been processed + log_w_num = 50000, --give a message when log_w_num words have been processed + timer = nerv.Timer() } global_conf.work_dir = work_dir_base.."/h"..global_conf.hidden_size.."bp"..global_conf.bptt.."slr"..global_conf.lrate..os.date("_%bD%dH%H") global_conf.param_fn = global_conf.work_dir.."/params" elseif (set == "test") then - train_fn = "/home/slhome/txh18/workspace/nerv-project/some-text" - valid_fn = "/home/slhome/txh18/workspace/nerv-project/some-text" - test_fn = "/home/slhome/txh18/workspace/nerv-project/some-text" - work_dir = "/home/slhome/txh18/workspace/nerv-project/lmptb-work-play" + train_fn = "/slfs1/users/txh18/workspace/nerv-project/some-text" + valid_fn = "/slfs1/users/txh18/workspace/nerv-project/some-text" + test_fn = "/slfs1/users/txh18/workspace/nerv-project/some-text" + work_dir = "/slfs1/users/txh18/workspace/nerv-project/lmptb-work-play" global_conf = { lrate = 0.1, wcost = 1e-6, momentum = 0, cumat_type = nerv.CuMatrixFloat, @@ -266,7 +292,7 @@ elseif (set == "test") then hidden_size = 5, batch_size = 1, - bptt = 1, --train bptt_block's words. could be set to zero + bptt = 0, --train bptt_block's words. could be set to zero max_iter = 15, param_random = function() return (math.random() / 5 - 0.1) end, independent = true, @@ -279,6 +305,7 @@ elseif (set == "test") then sche_log_pre = "[SCHEDULER]:", log_w_num = 80000, --give a message when log_w_num words have been processed + timer = nerv.Timer() } end @@ -298,8 +325,11 @@ scheduler = " printf(\"===INITIAL VALIDATION===\\n\") \ dagL, paramRepo = load_net(global_conf) \ local result = propagateFile(global_conf, dagL, global_conf.valid_fn, {do_train = false, report_word = false}) \ ppl_rec = {} \ + lr_rec = {} \ ppl_rec[0] = result:ppl_net(\"rnn\") ppl_last = ppl_rec[0] \ + lr_rec[0] = 0 \ printf(\"\\n\") \ + local lr_half = false \ for iter = 1, global_conf.max_iter, 1 do \ printf(\"===ITERATION %d LR %f===\\n\", iter, global_conf.lrate) \ global_conf.sche_log_pre = \"[SCHEDULER ITER\"..iter..\" LR\"..global_conf.lrate..\"]:\" \ @@ -308,8 +338,10 @@ scheduler = " printf(\"===INITIAL VALIDATION===\\n\") \ printf(\"===VALIDATION %d===\\n\", iter) \ local result = propagateFile(global_conf, dagL, global_conf.valid_fn, {do_train = false, report_word = false}) \ ppl_rec[iter] = result:ppl_net(\"rnn\") \ - if (ppl_last / ppl_rec[iter] < 1.03) then \ + lr_rec[iter] = global_conf.lrate \ + if (ppl_last / ppl_rec[iter] < 1.03 or lr_half == true) then \ global_conf.lrate = (global_conf.lrate / 2) \ + lr_half = true \ end \ if (ppl_rec[iter] < ppl_last) then \ printf(\"%s saving net to file %s...\\n\", global_conf.sche_log_pre, global_conf.param_fn) \ @@ -322,7 +354,7 @@ scheduler = " printf(\"===INITIAL VALIDATION===\\n\") \ nerv.LMUtil.wait(2) \ end \ printf(\"===VALIDATION PPL record===\\n\") \ - for i = 0, #ppl_rec do printf(\"<%d: %.2f> \", i, ppl_rec[i]) end \ + for i = 0, #ppl_rec do printf(\" \", i, lr_rec[i], ppl_rec[i]) end \ printf(\"\\n\") \ printf(\"===FINAL TEST===\\n\") \ global_conf.sche_log_pre = \"[SCHEDULER FINAL_TEST]:\" \ @@ -339,3 +371,5 @@ global_conf.vocab:build_file(global_conf.train_fn) prepare_parameters(global_conf, true) assert(loadstring(scheduler))() + + diff --git a/nerv/examples/lmptb/nerv b/nerv/examples/lmptb/nerv index 7b3b879..8829556 100755 --- a/nerv/examples/lmptb/nerv +++ b/nerv/examples/lmptb/nerv @@ -1,3 +1,3 @@ #!/bin/sh -exec '/home/slhome/txh18/workspace/nerv-project/nerv/install/bin/luajit' -e 'package.path="/home/slhome/txh18/.luarocks//share/lua/5.1/?.lua;/home/slhome/txh18/.luarocks//share/lua/5.1/?/init.lua;/home/slhome/txh18/workspace/nerv-project/nerv/install/share/lua/5.1/?.lua;/home/slhome/txh18/workspace/nerv-project/nerv/install/share/lua/5.1/?/init.lua;"..package.path; package.cpath="/home/slhome/txh18/.luarocks//lib/lua/5.1/?.so;/home/slhome/txh18/workspace/nerv-project/nerv/install/lib/lua/5.1/?.so;"..package.cpath' -e 'local k,l,_=pcall(require,"luarocks.loader") _=k and l.add_context("nerv","scm-1")' '/home/slhome/txh18/workspace/nerv-project/nerv/install/lib/luarocks/rocks/nerv/scm-1/bin/nerv' "$@" +exec '/home/slhome/txh18/workspace/nerv-project/nerv-1/install/bin/luajit' -e 'package.path="/home/slhome/txh18/.luarocks/share/lua/5.1/?.lua;/home/slhome/txh18/.luarocks/share/lua/5.1/?/init.lua;/home/slhome/txh18/workspace/nerv-project/nerv-1/install/share/lua/5.1/?.lua;/home/slhome/txh18/workspace/nerv-project/nerv-1/install/share/lua/5.1/?/init.lua;"..package.path; package.cpath="/home/slhome/txh18/.luarocks/lib/lua/5.1/?.so;/home/slhome/txh18/workspace/nerv-project/nerv-1/install/lib/lua/5.1/?.so;"..package.cpath' -e 'local k,l,_=pcall(require,"luarocks.loader") _=k and l.add_context("nerv","scm-1")' '/home/slhome/txh18/workspace/nerv-project/nerv-1/install/lib/luarocks/rocks/nerv/scm-1/bin/nerv' "$@" -- cgit v1.2.3-70-g09d2