aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--nerv/examples/lmptb/lm_trainer.lua15
-rw-r--r--nerv/examples/lmptb/rnn/tnn.lua28
-rw-r--r--nerv/examples/lmptb/tnn_ptb_main.lua19
3 files changed, 29 insertions, 33 deletions
diff --git a/nerv/examples/lmptb/lm_trainer.lua b/nerv/examples/lmptb/lm_trainer.lua
index 44862dc..7dd70e2 100644
--- a/nerv/examples/lmptb/lm_trainer.lua
+++ b/nerv/examples/lmptb/lm_trainer.lua
@@ -26,13 +26,15 @@ function LMTrainer.lm_process_file(global_conf, fn, tnn, do_train)
local r, feeds
- r, feeds = tnn:getFeedFromReader(reader)
- if (r == false) then break end
+ r, feeds = tnn:getfeed_from_reader(reader)
+ if r == false then
+ break
+ end
for t = 1, global_conf.chunk_size do
tnn.err_inputs_m[t][1]:fill(1)
for i = 1, global_conf.batch_size do
- if (bit.band(feeds.flags_now[t][i], nerv.TNN.FC.HAS_LABEL) == 0) then
+ if bit.band(feeds.flags_now[t][i], nerv.TNN.FC.HAS_LABEL) == 0 then
tnn.err_inputs_m[t][1][i - 1][0] = 0
end
end
@@ -50,7 +52,7 @@ function LMTrainer.lm_process_file(global_conf, fn, tnn, do_train)
tnn:net_propagate()
- if (do_train == true) then
+ if do_train == true then
tnn:net_backpropagate(false)
tnn:net_backpropagate(true)
end
@@ -62,12 +64,11 @@ function LMTrainer.lm_process_file(global_conf, fn, tnn, do_train)
end
end
end
-
- tnn:moveRightToNextMB()
+ tnn:move_right_to_nextmb()
global_conf.timer:toc('most_out_loop_lmprocessfile')
--print log
- if (result["rnn"].cn_w > next_log_wcn) then
+ if result["rnn"].cn_w > next_log_wcn then
next_log_wcn = next_log_wcn + global_conf.log_w_num
printf("%s %d words processed %s.\n", global_conf.sche_log_pre, result["rnn"].cn_w, os.date())
printf("\t%s log prob per sample :%f.\n", global_conf.sche_log_pre, result:logp_sample("rnn"))
diff --git a/nerv/examples/lmptb/rnn/tnn.lua b/nerv/examples/lmptb/rnn/tnn.lua
index d6bf42e..d10ab82 100644
--- a/nerv/examples/lmptb/rnn/tnn.lua
+++ b/nerv/examples/lmptb/rnn/tnn.lua
@@ -58,7 +58,7 @@ nerv.TNN.FC.HAS_INPUT = 1
nerv.TNN.FC.HAS_LABEL = 2
nerv.TNN.FC.SEQ_NORM = bit.bor(nerv.TNN.FC.HAS_INPUT, nerv.TNN.FC.HAS_LABEL) --This instance have both input and label
-function TNN.makeInitialStore(st, p, dim, batch_size, chunk_size, global_conf, st_c, p_c, t_c)
+function TNN.make_initial_store(st, p, dim, batch_size, chunk_size, global_conf, st_c, p_c, t_c)
--Return a table of matrix storage from time (1-chunk_size)..(2*chunk_size)
if (type(st) ~= "table") then
nerv.error("st should be a table")
@@ -78,7 +78,7 @@ function TNN.makeInitialStore(st, p, dim, batch_size, chunk_size, global_conf, s
end
end
-function TNN:outOfFeedRange(t) --out of chunk, or no input, for the current feed
+function TNN:out_of_feedrange(t) --out of chunk, or no input, for the current feed
if (t < 1 or t > self.chunk_size) then
return true
end
@@ -165,9 +165,9 @@ function TNN:init(batch_size, chunk_size)
print("TNN initing storage", ref_from.layer.id, "->", ref_to.layer.id)
ref_to.inputs_matbak_p[port_to] = self.gconf.cumat_type(batch_size, dim)
- self.makeInitialStore(ref_from.outputs_m, port_from, dim, batch_size, chunk_size, self.gconf, ref_to.inputs_m, port_to, time)
+ self.make_initial_store(ref_from.outputs_m, port_from, dim, batch_size, chunk_size, self.gconf, ref_to.inputs_m, port_to, time)
ref_from.err_inputs_matbak_p[port_from] = self.gconf.cumat_type(batch_size, dim)
- self.makeInitialStore(ref_from.err_inputs_m, port_from, dim, batch_size, chunk_size, self.gconf, ref_to.err_outputs_m, port_to, time)
+ self.make_initial_store(ref_from.err_inputs_m, port_from, dim, batch_size, chunk_size, self.gconf, ref_to.err_outputs_m, port_to, time)
end
@@ -176,8 +176,8 @@ function TNN:init(batch_size, chunk_size)
for i = 1, #self.dim_out do --Init storage for output ports
local ref = self.outputs_p[i].ref
local p = self.outputs_p[i].port
- self.makeInitialStore(ref.outputs_m, p, self.dim_out[i], batch_size, chunk_size, self.gconf, self.outputs_m, i, 0)
- self.makeInitialStore(ref.err_inputs_m, p, self.dim_out[i], batch_size, chunk_size, self.gconf, self.err_inputs_m, i, 0)
+ self.make_initial_store(ref.outputs_m, p, self.dim_out[i], batch_size, chunk_size, self.gconf, self.outputs_m, i, 0)
+ self.make_initial_store(ref.err_inputs_m, p, self.dim_out[i], batch_size, chunk_size, self.gconf, self.err_inputs_m, i, 0)
end
self.inputs_m = {}
@@ -185,8 +185,8 @@ function TNN:init(batch_size, chunk_size)
for i = 1, #self.dim_in do --Init storage for input ports
local ref = self.inputs_p[i].ref
local p = self.inputs_p[i].port
- self.makeInitialStore(ref.inputs_m, p, self.dim_in[i], batch_size, chunk_size, self.gconf, self.inputs_m, i, 0)
- self.makeInitialStore(ref.err_outputs_m, p, self.dim_in[i], batch_size, chunk_size, self.gconf, self.err_outputs_m, i, 0)
+ self.make_initial_store(ref.inputs_m, p, self.dim_in[i], batch_size, chunk_size, self.gconf, self.inputs_m, i, 0)
+ self.make_initial_store(ref.err_outputs_m, p, self.dim_in[i], batch_size, chunk_size, self.gconf, self.err_outputs_m, i, 0)
end
for id, ref in pairs(self.layers) do --Calling init for child layers
@@ -285,13 +285,13 @@ end
--reader: some reader
--Returns: bool, whether has new feed
--Returns: feeds, a table that will be filled with the reader's feeds
-function TNN:getFeedFromReader(reader)
+function TNN:getfeed_from_reader(reader)
local feeds_now = self.feeds_now
local got_new = reader:get_batch(feeds_now)
return got_new, feeds_now
end
-function TNN:moveRightToNextMB() --move output history activations of 1..chunk_size to 1-chunk_size..0
+function TNN:move_right_to_nextmb() --move output history activations of 1..chunk_size to 1-chunk_size..0
for t = 1, self.chunk_size, 1 do
for id, ref in pairs(self.layers) do
for p = 1, #ref.dim_out do
@@ -345,7 +345,7 @@ end
--ref: the TNN_ref of a layer
--t: the current time to propagate
function TNN:propagate_dfs(ref, t)
- if (self:outOfFeedRange(t)) then
+ if (self:out_of_feedrange(t)) then
return
end
if (ref.outputs_b[t][1] == true) then --already propagated, 1 is just a random port
@@ -357,7 +357,7 @@ function TNN:propagate_dfs(ref, t)
local flag = true --whether have all inputs
for _, conn in pairs(ref.i_conns_p) do
local p = conn.dst.port
- if (not (ref.inputs_b[t][p] or self:outOfFeedRange(t - conn.time))) then
+ if (not (ref.inputs_b[t][p] or self:out_of_feedrange(t - conn.time))) then
flag = false
break
end
@@ -465,7 +465,7 @@ end
--ref: the TNN_ref of a layer
--t: the current time to propagate
function TNN:backpropagate_dfs(ref, t, do_update)
- if (self:outOfFeedRange(t)) then
+ if (self:out_of_feedrange(t)) then
return
end
if (ref.err_outputs_b[t][1] == true) then --already back_propagated, 1 is just a random port
@@ -477,7 +477,7 @@ function TNN:backpropagate_dfs(ref, t, do_update)
local flag = true --whether have all inputs
for _, conn in pairs(ref.o_conns_p) do
local p = conn.src.port
- if (not (ref.err_inputs_b[t][p] or self:outOfFeedRange(t + conn.time))) then
+ if (not (ref.err_inputs_b[t][p] or self:out_of_feedrange(t + conn.time))) then
flag = false
break
end
diff --git a/nerv/examples/lmptb/tnn_ptb_main.lua b/nerv/examples/lmptb/tnn_ptb_main.lua
index f68311c..c37b217 100644
--- a/nerv/examples/lmptb/tnn_ptb_main.lua
+++ b/nerv/examples/lmptb/tnn_ptb_main.lua
@@ -17,7 +17,7 @@ local LMTrainer = nerv.LMTrainer
function prepare_parameters(global_conf, iter)
printf("%s preparing parameters...\n", global_conf.sche_log_pre)
- if (iter == -1) then --first time
+ if iter == -1 then --first time
printf("%s first time, generating parameters...\n", global_conf.sche_log_pre)
ltp_ih = nerv.LinearTransParam("ltp_ih", global_conf)
ltp_ih.trans = global_conf.cumat_type(global_conf.vocab:size(), global_conf.hidden_size) --index 0 is for zero, others correspond to vocab index(starting from 1)
@@ -290,9 +290,11 @@ printf("%s building vocab...\n", global_conf.sche_log_pre)
global_conf.vocab:build_file(global_conf.vocab_fn, false)
ppl_rec = {}
-if (start_iter == -1) then
+if start_iter == -1 then
prepare_parameters(global_conf, -1) --randomly generate parameters
+end
+if start_iter == -1 or start_iter == 0 then
print("===INITIAL VALIDATION===")
local tnn, paramRepo = load_net(global_conf, 0)
local result = LMTrainer.lm_process_file(global_conf, global_conf.valid_fn, tnn, false) --false update!
@@ -309,9 +311,6 @@ if (start_iter == -1) then
print()
end
-if (start_iter == 0) then
- nerv.error("start_iter should not be zero")
-end
local final_iter
for iter = start_iter, global_conf.max_iter, 1 do
final_iter = iter --for final testing
@@ -335,21 +334,17 @@ for iter = start_iter, global_conf.max_iter, 1 do
if ((ppl_last / ppl_rec[iter].valid < 1.0003 or lr_half == true) and iter > global_conf.decay_iter) then
global_conf.lrate = (global_conf.lrate * 0.6)
end
- if (ppl_rec[iter].valid < ppl_last) then
+ if ppl_rec[iter].valid < ppl_last then
printf("%s PPL improves, saving net to file %s.%d...\n", global_conf.sche_log_pre, global_conf.param_fn, iter)
paramRepo:export(global_conf.param_fn .. '.' .. tostring(iter), nil)
else
printf("%s PPL did not improve, rejected, copying param file of last iter...\n", global_conf.sche_log_pre)
os.execute('cp ' .. global_conf.param_fn..'.'..tostring(iter - 1) .. ' ' .. global_conf.param_fn..'.'..tostring(iter))
- --if (lr_half == true) then
- -- printf("%s LR is already halfing, end training...\n", global_conf.sche_log_pre)
- -- break
- --end
end
- if (ppl_last / ppl_rec[iter].valid < 1.0003 or lr_half == true) then
+ if ppl_last / ppl_rec[iter].valid < 1.0003 or lr_half == true then
lr_half = true
end
- if (ppl_rec[iter].valid < ppl_last) then
+ if ppl_rec[iter].valid < ppl_last then
ppl_last = ppl_rec[iter].valid
end
printf("\n")