summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorcloudygoose <[email protected]>2015-08-11 11:48:51 +0800
committercloudygoose <[email protected]>2015-08-11 11:48:51 +0800
commit9cb04041b1f1aabfd67480140caa56325b95b9ad (patch)
treebccbce2335f90afb3362839c3b2d3e5b12911667
parent88c49ed196b90d1f4f0e9562212d36509f218dc8 (diff)
Last port is wrong
-rw-r--r--nerv/examples/lmptb/lmptb/layer/init.lua4
-rw-r--r--nerv/examples/lmptb/lmptb/layer/lm_affine_recurrent.lua6
-rw-r--r--nerv/examples/lmptb/lmptb/lmutil.lua65
-rw-r--r--nerv/examples/lmptb/main.lua118
-rwxr-xr-xnerv/examples/lmptb/nerv2
5 files changed, 147 insertions, 48 deletions
diff --git a/nerv/examples/lmptb/lmptb/layer/init.lua b/nerv/examples/lmptb/lmptb/layer/init.lua
index b3b00f6..ff29126 100644
--- a/nerv/examples/lmptb/lmptb/layer/init.lua
+++ b/nerv/examples/lmptb/lmptb/layer/init.lua
@@ -1,5 +1,5 @@
-require 'lmptb.layer.affine_recurrent'
+require 'lmptb.layer.select_linear'
+--require 'lmptb.layer.affine_recurrent'
require 'lmptb.layer.lm_affine_recurrent'
-
diff --git a/nerv/examples/lmptb/lmptb/layer/lm_affine_recurrent.lua b/nerv/examples/lmptb/lmptb/layer/lm_affine_recurrent.lua
index f1eb4a1..c43e567 100644
--- a/nerv/examples/lmptb/lmptb/layer/lm_affine_recurrent.lua
+++ b/nerv/examples/lmptb/lmptb/layer/lm_affine_recurrent.lua
@@ -1,4 +1,4 @@
-local LMRecurrent = nerv.class('nerv.LMAffineRecurrentLayer', 'nerv.AffineRecurrentLayer') --breaks at sentence end, when </s> is met, input will be set to zero
+local LMRecurrent = nerv.class('nerv.IndRecurrentLayer', 'nerv.AffineRecurrentLayer') --breaks at sentence end, when </s> is met, input will be set to zero
--id: string
--global_conf: table
@@ -11,10 +11,10 @@ function LMRecurrent:__init(id, global_conf, layer_conf)
end
function LMRecurrent:propagate(input, output)
- output[1]:mul(input[1], self.ltp_ih.trans, 1.0, 0.0, 'N', 'N')
+ output[1]:copy_fromd(input[1])
if (self.independent == true) then
for i = 1, input[1]:nrow() do
- if (input[1][i - 1][self.break_id - 1] > 0.1) then --here is sentence break
+ if (self.gconf.input_word_id[self.id][i - 1][0] == self.break_id) then --here is sentence break
input[2][i - 1]:fill(0)
end
end
diff --git a/nerv/examples/lmptb/lmptb/lmutil.lua b/nerv/examples/lmptb/lmptb/lmutil.lua
index c15c637..73cf041 100644
--- a/nerv/examples/lmptb/lmptb/lmutil.lua
+++ b/nerv/examples/lmptb/lmptb/lmutil.lua
@@ -17,6 +17,45 @@ function Util.create_onehot(list, vocab, ty)
return m
end
+--m: matrix
+--list: table, list of string(word)
+--vocab: nerv.LMVocab
+--Returns: nerv.CuMatrixFloat
+--Set the matrix, whose size should be size #list * vocab:size() to be one_hot according to the list. null_word will become a zero vector.
+function Util.set_onehot(m, list, vocab)
+ if (m:nrow() ~= #list or m:ncol() ~= vocab:size()) then
+ nerv.error("size of matrix mismatch with list and vocab")
+ end
+ m:fill(0)
+ for i = 1, #list, 1 do
+ --index in matrix starts at 0
+ if (list[i] ~= vocab.null_token) then
+ m[i - 1][vocab:get_word_str(list[i]).id - 1] = 1
+ end
+ end
+ return m
+end
+
+--m: matrix
+--list: table, list of string(word)
+--vocab: nerv.LMVocab
+--Returns: nerv.MMatrixInt
+--Set the matrix to be ids of the words, id starting at 1, not 0
+function Util.set_id(m, list, vocab)
+ if (m:nrow() ~= #list or m:ncol() ~= 1) then
+ nerv.error("nrow of matrix mismatch with list or its col not one")
+ end
+ for i = 1, #list, 1 do
+ --index in matrix starts at 0
+ if (list[i] ~= vocab.null_token) then
+ m[i - 1][0] = vocab:get_word_str(list[i]).id
+ else
+ m[i - 1][0] = 0
+ end
+ end
+ return m
+end
+
function Util.wait(sec)
local start = os.time()
repeat until os.time() > start + sec
@@ -66,3 +105,29 @@ end
function Result:status(cla)
return "LMResult status of " .. cla .. ": " .. "<SEN_CN " .. self[cla].cn_sen .. "> <W_CN " .. self[cla].cn_w .. "> <PPL_NET " .. self:ppl_net(cla) .. "> <PPL_OOV " .. self:ppl_all(cla) .. "> <LOGP " .. self[cla].logp_all .. ">"
end
+
+local Timer = nerv.class("nerv.Timer")
+function Timer:__init()
+ self.last = {}
+ self.rec = {}
+end
+
+function Timer:tic(item)
+ self.last[item] = os.time()
+end
+
+function Timer:toc(item)
+ if (self.last[item] == nil) then
+ nerv.error("item not there")
+ end
+ if (self.rec[item] == nil) then
+ self.rec[item] = 0
+ end
+ self.rec[item] = self.rec[item] + os.difftime(os.time(), self.last[item])
+end
+
+function Timer:flush()
+ for key, value in pairs(self.rec) do
+ self.rec[key] = 0
+ end
+end
diff --git a/nerv/examples/lmptb/main.lua b/nerv/examples/lmptb/main.lua
index e797254..8764998 100644
--- a/nerv/examples/lmptb/main.lua
+++ b/nerv/examples/lmptb/main.lua
@@ -55,12 +55,16 @@ end
--Returns: nerv.LayerRepo
function prepare_layers(global_conf, paramRepo)
printf("%s preparing layers...\n", global_conf.sche_log_pre)
- local recurrentLconfig = {{["bp"] = "bp_h", ["ltp_ih"] = "ltp_ih", ["ltp_hh"] = "ltp_hh"}, {["dim_in"] = {global_conf.vocab:size(), global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["break_id"] = global_conf.vocab:get_sen_entry().id, ["independent"] = global_conf.independent}}
+ local recurrentLconfig = {{["bp"] = "bp_h", ["ltp_hh"] = "ltp_hh"}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["break_id"] = global_conf.vocab:get_sen_entry().id, ["independent"] = global_conf.independent, ["clip"] = 10}}
local layers = {
- ["nerv.LMAffineRecurrentLayer"] = {
+ ["nerv.IndRecurrentLayer"] = {
["recurrentL1"] = recurrentLconfig,
},
+ ["nerv.SelectLinearLayer"] = {
+ ["selectL1"] = {{["ltp"] = "ltp_ih"}, {["dim_in"] = {1}, ["dim_out"] = {global_conf.hidden_size}}},
+ },
+
["nerv.SigmoidLayer"] = {
["sigmoidL1"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}}
},
@@ -76,8 +80,9 @@ function prepare_layers(global_conf, paramRepo)
printf("%s adding %d bptt layers...\n", global_conf.sche_log_pre, global_conf.bptt)
for i = 1, global_conf.bptt do
- layers["nerv.LMAffineRecurrentLayer"]["recurrentL" .. (i + 1)] = recurrentLconfig
+ layers["nerv.IndRecurrentLayer"]["recurrentL" .. (i + 1)] = recurrentLconfig
layers["nerv.SigmoidLayer"]["sigmoidL" .. (i + 1)] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}}
+ layers["nerv.SelectLinearLayer"]["selectL" .. (i + 1)] = {{["ltp"] = "ltp_ih"}, {["dim_in"] = {1}, ["dim_out"] = {global_conf.hidden_size}}}
end
local layerRepo = nerv.LayerRepo(layers, paramRepo, global_conf)
printf("%s preparing layers end.\n", global_conf.sche_log_pre)
@@ -92,7 +97,7 @@ function prepare_dagLayer(global_conf, layerRepo)
--input: input_w, input_w, ... input_w_now, last_activation
local dim_in_t = {}
- for i = 1, global_conf.bptt + 1 do dim_in_t[i] = global_conf.vocab:size() end
+ for i = 1, global_conf.bptt + 1 do dim_in_t[i] = 1 end
dim_in_t[global_conf.bptt + 2] = global_conf.hidden_size
dim_in_t[global_conf.bptt + 3] = global_conf.vocab:size()
--[[ softmax
@@ -100,21 +105,24 @@ function prepare_dagLayer(global_conf, layerRepo)
ouptut i(bptt+3)
|
recurrentL(bptt+1)... recurrentL2-recurrentL1
+ selectL(bptt+1) selectL2 selectL1
/ | | |
i(bptt+2) i(bptt+1) i2 i1
]]--
local connections_t = {
+ ["selectL1[1]"] = "recurrentL1[1]",
["recurrentL1[1]"] = "sigmoidL1[1]",
["sigmoidL1[1]"] = "outputL[1]",
["outputL[1]"] = "softmaxL[1]",
["softmaxL[1]"] = "<output>[1]"
}
for i = 1, global_conf.bptt, 1 do
- connections_t["<input>["..i.."]"] = "recurrentL"..i.."[1]"
+ connections_t["<input>["..i.."]"] = "selectL"..i.."[1]"
+ connections_t["selectL"..(i+1).."[1]"] = "recurrentL"..(i+1).."[1]"
connections_t["recurrentL"..(i+1).."[1]"] = "sigmoidL"..(i+1).."[1]"
connections_t["sigmoidL"..(i+1).."[1]"] = "recurrentL"..i.."[2]"
end
- connections_t["<input>["..(global_conf.bptt+1).."]"] = "recurrentL"..(global_conf.bptt+1).."[1]"
+ connections_t["<input>["..(global_conf.bptt+1).."]"] = "selectL"..(global_conf.bptt+1).."[1]"
connections_t["<input>["..(global_conf.bptt+2).."]"] = "recurrentL"..(global_conf.bptt+1).."[2]"
connections_t["<input>["..(global_conf.bptt+3).."]"] = "softmaxL[2]"
printf("%s printing DAG connections:\n", global_conf.sche_log_pre)
@@ -130,18 +138,6 @@ function prepare_dagLayer(global_conf, layerRepo)
return dagL
end
---Returns: table
-function create_dag_input(global_conf, token_store, hidden_store, tnow)
- local dagL_input = {}
- for i = 1, global_conf.bptt + 1 do
- dagL_input[i] = nerv.LMUtil.create_onehot(token_store[tnow - i + 1], global_conf.vocab, global_conf.cumat_type)
- end
- dagL_input[global_conf.bptt + 2] = hidden_store[tnow - global_conf.bptt - 1]
- dagL_input[global_conf.bptt + 3] = nerv.LMUtil.create_onehot(token_store[tnow + 1], global_conf.vocab, global_conf.cumat_type)
-
- return dagL_input
-end
-
--global_conf: table
--dagL: nerv.DAGLayer
--fn: string
@@ -165,32 +161,54 @@ function propagateFile(global_conf, dagL, fn, config)
token_store[tnow - i] = {}
for j = 1, global_conf.batch_size do token_store[tnow - i][j] = global_conf.vocab.null_token end
end
-
+
+ local dagL_input = {}
+ for i = 1, global_conf.bptt + 1 do
+ dagL_input[i] = nerv.MMatrixInt(global_conf.batch_size, 1)
+ end
+ dagL_input[global_conf.bptt + 2] = global_conf.cumat_type(global_conf.batch_size, global_conf.hidden_size)
+ dagL_input[global_conf.bptt + 3] = global_conf.cumat_type(global_conf.batch_size, global_conf.vocab:size())
+
local dagL_output = {global_conf.cumat_type(global_conf.batch_size, 1)}
local dagL_err = {nil} --{global_conf.cumat_type(global_conf.batch_size, 1)}
local dagL_input_err = {}
for i = 1, global_conf.bptt + 1 do
- dagL_input_err[i] = global_conf.cumat_type(global_conf.batch_size, global_conf.vocab:size())
+ dagL_input_err[i] = nil --global_conf.cumat_type(global_conf.batch_size, global_conf.vocab:size())
end
dagL_input_err[global_conf.bptt + 2] = global_conf.cumat_type(global_conf.batch_size, global_conf.hidden_size)
dagL_input_err[global_conf.bptt + 3] = global_conf.cumat_type(global_conf.batch_size, global_conf.vocab:size())
local result = nerv.LMResult(global_conf, global_conf.vocab)
result:init("rnn")
-
+
+ global_conf.input_word_id = {}
while (1) do
token_store[tnow + 1] = feeder:get_batch() --The next word(to predict)
if (token_store[tnow + 1] == nil) then break end
- local dagL_input = create_dag_input(global_conf, token_store, hidden_store, tnow)
--dagL:propagate(dagL_input, dagL_output)
+ for i = 1, global_conf.bptt + 1 do
+ nerv.LMUtil.set_id(dagL_input[i], token_store[tnow - i + 1], global_conf.vocab)
+ global_conf.input_word_id["recurrentL"..i] = dagL_input[i] --for IndRecurrent
+ end
+ dagL_input[global_conf.bptt + 2]:copy_fromd(hidden_store[tnow - global_conf.bptt - 1])
+ nerv.LMUtil.set_onehot(dagL_input[global_conf.bptt + 3], token_store[tnow + 1], global_conf.vocab)
+
+ --local dagL_input = create_dag_input(global_conf, token_store, hidden_store, tnow)
+ global_conf.timer:tic("dagL-propagate")
+ dagL:propagate(dagL_input, dagL_output)
+ global_conf.timer:toc("dagL-propagate")
hidden_store[tnow] = global_conf.cumat_type(global_conf.batch_size, global_conf.hidden_size)
hidden_store[tnow]:copy_fromd(sigmoidL_ref.outputs[1])
if (config.do_train == true) then
- --dagL:back_propagate(dagL_err, dagL_input_err, dagL_input, dagL_output)
- --dagL:update(dagL_err, dagL_input, dagL_output)
+ global_conf.timer:tic("dagL-back_propagate")
+ dagL:back_propagate(dagL_err, dagL_input_err, dagL_input, dagL_output)
+ global_conf.timer:toc("dagL-back_propagate")
+ global_conf.timer:tic("dagL-update")
+ dagL:update(dagL_err, dagL_input, dagL_output)
+ global_conf.timer:toc("dagL-update")
end
for i = 1, global_conf.batch_size, 1 do
@@ -201,13 +219,20 @@ function propagateFile(global_conf, dagL, fn, config)
end
end
if (result["rnn"].cn_w % global_conf.log_w_num == 0) then
- printf("%s %d words processed.\n", global_conf.sche_log_pre, result["rnn"].cn_w)
+ printf("%s %d words processed %s.\n", global_conf.sche_log_pre, result["rnn"].cn_w, os.date())
+ for key, value in pairs(global_conf.timer.rec) do
+ printf("\t [global_conf.timer]: time spent on %s:%.5fs\n", key, value)
+ end
+ global_conf.timer:flush()
+ --nerv.CuMatrix.print_profile()
+ --nerv.CuMatrix.clear_profile()
end
end
token_store[tnow - 2 - global_conf.bptt] = nil
hidden_store[tnow - 2 - global_conf.bptt] = nil
- collectgarbage("collect")
+ collectgarbage("collect")
+
tnow = tnow + 1
end
@@ -230,19 +255,19 @@ end
local set = "ptb"
if (set == "ptb") then
- train_fn = "/home/slhome/txh18/workspace/nerv-project/nerv/examples/lmptb/PTBdata/ptb.train.txt"
- valid_fn = "/home/slhome/txh18/workspace/nerv-project/nerv/examples/lmptb/PTBdata/ptb.valid.txt"
- test_fn = "/home/slhome/txh18/workspace/nerv-project/nerv/examples/lmptb/PTBdata/ptb.test.txt"
- work_dir_base = "/home/slhome/txh18/workspace/nerv-project/lmptb-work"
+ train_fn = "/slfs1/users/txh18/workspace/nerv-project/nerv/nerv/examples/lmptb/PTBdata/ptb.train.txt"
+ valid_fn = "/slfs1/users/txh18/workspace/nerv-project/nerv/nerv/examples/lmptb/PTBdata/ptb.valid.txt"
+ test_fn = "/slfs1/users/txh18/workspace/nerv-project/nerv/nerv/examples/lmptb/PTBdata/ptb.test.txt"
+ work_dir_base = "/slfs1/users/txh18/workspace/nerv-project/lmptb-work"
global_conf = {
- lrate = 0.1, wcost = 1e-6, momentum = 0,
+ lrate = 1, wcost = 1e-6, momentum = 0,
cumat_type = nerv.CuMatrixFloat,
mmat_type = nerv.CuMatrixFloat,
- hidden_size = 100,
+ hidden_size = 200,
batch_size = 10,
- bptt = 3, --train bptt_block's words. could be set to zero
- max_iter = 15,
+ bptt = 6, --train bptt_block's words. could be set to zero
+ max_iter = 18,
param_random = function() return (math.random() / 5 - 0.1) end,
independent = true,
@@ -250,15 +275,16 @@ if (set == "ptb") then
valid_fn = valid_fn,
test_fn = test_fn,
sche_log_pre = "[SCHEDULER]:",
- log_w_num = 100000, --give a message when log_w_num words have been processed
+ log_w_num = 50000, --give a message when log_w_num words have been processed
+ timer = nerv.Timer()
}
global_conf.work_dir = work_dir_base.."/h"..global_conf.hidden_size.."bp"..global_conf.bptt.."slr"..global_conf.lrate..os.date("_%bD%dH%H")
global_conf.param_fn = global_conf.work_dir.."/params"
elseif (set == "test") then
- train_fn = "/home/slhome/txh18/workspace/nerv-project/some-text"
- valid_fn = "/home/slhome/txh18/workspace/nerv-project/some-text"
- test_fn = "/home/slhome/txh18/workspace/nerv-project/some-text"
- work_dir = "/home/slhome/txh18/workspace/nerv-project/lmptb-work-play"
+ train_fn = "/slfs1/users/txh18/workspace/nerv-project/some-text"
+ valid_fn = "/slfs1/users/txh18/workspace/nerv-project/some-text"
+ test_fn = "/slfs1/users/txh18/workspace/nerv-project/some-text"
+ work_dir = "/slfs1/users/txh18/workspace/nerv-project/lmptb-work-play"
global_conf = {
lrate = 0.1, wcost = 1e-6, momentum = 0,
cumat_type = nerv.CuMatrixFloat,
@@ -266,7 +292,7 @@ elseif (set == "test") then
hidden_size = 5,
batch_size = 1,
- bptt = 1, --train bptt_block's words. could be set to zero
+ bptt = 0, --train bptt_block's words. could be set to zero
max_iter = 15,
param_random = function() return (math.random() / 5 - 0.1) end,
independent = true,
@@ -279,6 +305,7 @@ elseif (set == "test") then
sche_log_pre = "[SCHEDULER]:",
log_w_num = 80000, --give a message when log_w_num words have been processed
+ timer = nerv.Timer()
}
end
@@ -298,8 +325,11 @@ scheduler = " printf(\"===INITIAL VALIDATION===\\n\") \
dagL, paramRepo = load_net(global_conf) \
local result = propagateFile(global_conf, dagL, global_conf.valid_fn, {do_train = false, report_word = false}) \
ppl_rec = {} \
+ lr_rec = {} \
ppl_rec[0] = result:ppl_net(\"rnn\") ppl_last = ppl_rec[0] \
+ lr_rec[0] = 0 \
printf(\"\\n\") \
+ local lr_half = false \
for iter = 1, global_conf.max_iter, 1 do \
printf(\"===ITERATION %d LR %f===\\n\", iter, global_conf.lrate) \
global_conf.sche_log_pre = \"[SCHEDULER ITER\"..iter..\" LR\"..global_conf.lrate..\"]:\" \
@@ -308,8 +338,10 @@ scheduler = " printf(\"===INITIAL VALIDATION===\\n\") \
printf(\"===VALIDATION %d===\\n\", iter) \
local result = propagateFile(global_conf, dagL, global_conf.valid_fn, {do_train = false, report_word = false}) \
ppl_rec[iter] = result:ppl_net(\"rnn\") \
- if (ppl_last / ppl_rec[iter] < 1.03) then \
+ lr_rec[iter] = global_conf.lrate \
+ if (ppl_last / ppl_rec[iter] < 1.03 or lr_half == true) then \
global_conf.lrate = (global_conf.lrate / 2) \
+ lr_half = true \
end \
if (ppl_rec[iter] < ppl_last) then \
printf(\"%s saving net to file %s...\\n\", global_conf.sche_log_pre, global_conf.param_fn) \
@@ -322,7 +354,7 @@ scheduler = " printf(\"===INITIAL VALIDATION===\\n\") \
nerv.LMUtil.wait(2) \
end \
printf(\"===VALIDATION PPL record===\\n\") \
- for i = 0, #ppl_rec do printf(\"<%d: %.2f> \", i, ppl_rec[i]) end \
+ for i = 0, #ppl_rec do printf(\"<ITER%d LR%.5f: %.3f> \", i, lr_rec[i], ppl_rec[i]) end \
printf(\"\\n\") \
printf(\"===FINAL TEST===\\n\") \
global_conf.sche_log_pre = \"[SCHEDULER FINAL_TEST]:\" \
@@ -339,3 +371,5 @@ global_conf.vocab:build_file(global_conf.train_fn)
prepare_parameters(global_conf, true)
assert(loadstring(scheduler))()
+
+
diff --git a/nerv/examples/lmptb/nerv b/nerv/examples/lmptb/nerv
index 7b3b879..8829556 100755
--- a/nerv/examples/lmptb/nerv
+++ b/nerv/examples/lmptb/nerv
@@ -1,3 +1,3 @@
#!/bin/sh
-exec '/home/slhome/txh18/workspace/nerv-project/nerv/install/bin/luajit' -e 'package.path="/home/slhome/txh18/.luarocks//share/lua/5.1/?.lua;/home/slhome/txh18/.luarocks//share/lua/5.1/?/init.lua;/home/slhome/txh18/workspace/nerv-project/nerv/install/share/lua/5.1/?.lua;/home/slhome/txh18/workspace/nerv-project/nerv/install/share/lua/5.1/?/init.lua;"..package.path; package.cpath="/home/slhome/txh18/.luarocks//lib/lua/5.1/?.so;/home/slhome/txh18/workspace/nerv-project/nerv/install/lib/lua/5.1/?.so;"..package.cpath' -e 'local k,l,_=pcall(require,"luarocks.loader") _=k and l.add_context("nerv","scm-1")' '/home/slhome/txh18/workspace/nerv-project/nerv/install/lib/luarocks/rocks/nerv/scm-1/bin/nerv' "$@"
+exec '/home/slhome/txh18/workspace/nerv-project/nerv-1/install/bin/luajit' -e 'package.path="/home/slhome/txh18/.luarocks/share/lua/5.1/?.lua;/home/slhome/txh18/.luarocks/share/lua/5.1/?/init.lua;/home/slhome/txh18/workspace/nerv-project/nerv-1/install/share/lua/5.1/?.lua;/home/slhome/txh18/workspace/nerv-project/nerv-1/install/share/lua/5.1/?/init.lua;"..package.path; package.cpath="/home/slhome/txh18/.luarocks/lib/lua/5.1/?.so;/home/slhome/txh18/workspace/nerv-project/nerv-1/install/lib/lua/5.1/?.so;"..package.cpath' -e 'local k,l,_=pcall(require,"luarocks.loader") _=k and l.add_context("nerv","scm-1")' '/home/slhome/txh18/workspace/nerv-project/nerv-1/install/lib/luarocks/rocks/nerv/scm-1/bin/nerv' "$@"