diff options
-rw-r--r-- | nerv/examples/lmptb/lm_trainer.lua | 15 | ||||
-rw-r--r-- | nerv/examples/lmptb/rnn/tnn.lua | 28 | ||||
-rw-r--r-- | nerv/examples/lmptb/tnn_ptb_main.lua | 19 |
3 files changed, 29 insertions, 33 deletions
diff --git a/nerv/examples/lmptb/lm_trainer.lua b/nerv/examples/lmptb/lm_trainer.lua index 44862dc..7dd70e2 100644 --- a/nerv/examples/lmptb/lm_trainer.lua +++ b/nerv/examples/lmptb/lm_trainer.lua @@ -26,13 +26,15 @@ function LMTrainer.lm_process_file(global_conf, fn, tnn, do_train) local r, feeds - r, feeds = tnn:getFeedFromReader(reader) - if (r == false) then break end + r, feeds = tnn:getfeed_from_reader(reader) + if r == false then + break + end for t = 1, global_conf.chunk_size do tnn.err_inputs_m[t][1]:fill(1) for i = 1, global_conf.batch_size do - if (bit.band(feeds.flags_now[t][i], nerv.TNN.FC.HAS_LABEL) == 0) then + if bit.band(feeds.flags_now[t][i], nerv.TNN.FC.HAS_LABEL) == 0 then tnn.err_inputs_m[t][1][i - 1][0] = 0 end end @@ -50,7 +52,7 @@ function LMTrainer.lm_process_file(global_conf, fn, tnn, do_train) tnn:net_propagate() - if (do_train == true) then + if do_train == true then tnn:net_backpropagate(false) tnn:net_backpropagate(true) end @@ -62,12 +64,11 @@ function LMTrainer.lm_process_file(global_conf, fn, tnn, do_train) end end end - - tnn:moveRightToNextMB() + tnn:move_right_to_nextmb() global_conf.timer:toc('most_out_loop_lmprocessfile') --print log - if (result["rnn"].cn_w > next_log_wcn) then + if result["rnn"].cn_w > next_log_wcn then next_log_wcn = next_log_wcn + global_conf.log_w_num printf("%s %d words processed %s.\n", global_conf.sche_log_pre, result["rnn"].cn_w, os.date()) printf("\t%s log prob per sample :%f.\n", global_conf.sche_log_pre, result:logp_sample("rnn")) diff --git a/nerv/examples/lmptb/rnn/tnn.lua b/nerv/examples/lmptb/rnn/tnn.lua index d6bf42e..d10ab82 100644 --- a/nerv/examples/lmptb/rnn/tnn.lua +++ b/nerv/examples/lmptb/rnn/tnn.lua @@ -58,7 +58,7 @@ nerv.TNN.FC.HAS_INPUT = 1 nerv.TNN.FC.HAS_LABEL = 2 nerv.TNN.FC.SEQ_NORM = bit.bor(nerv.TNN.FC.HAS_INPUT, nerv.TNN.FC.HAS_LABEL) --This instance have both input and label -function TNN.makeInitialStore(st, p, dim, batch_size, chunk_size, global_conf, st_c, p_c, t_c) +function TNN.make_initial_store(st, p, dim, batch_size, chunk_size, global_conf, st_c, p_c, t_c) --Return a table of matrix storage from time (1-chunk_size)..(2*chunk_size) if (type(st) ~= "table") then nerv.error("st should be a table") @@ -78,7 +78,7 @@ function TNN.makeInitialStore(st, p, dim, batch_size, chunk_size, global_conf, s end end -function TNN:outOfFeedRange(t) --out of chunk, or no input, for the current feed +function TNN:out_of_feedrange(t) --out of chunk, or no input, for the current feed if (t < 1 or t > self.chunk_size) then return true end @@ -165,9 +165,9 @@ function TNN:init(batch_size, chunk_size) print("TNN initing storage", ref_from.layer.id, "->", ref_to.layer.id) ref_to.inputs_matbak_p[port_to] = self.gconf.cumat_type(batch_size, dim) - self.makeInitialStore(ref_from.outputs_m, port_from, dim, batch_size, chunk_size, self.gconf, ref_to.inputs_m, port_to, time) + self.make_initial_store(ref_from.outputs_m, port_from, dim, batch_size, chunk_size, self.gconf, ref_to.inputs_m, port_to, time) ref_from.err_inputs_matbak_p[port_from] = self.gconf.cumat_type(batch_size, dim) - self.makeInitialStore(ref_from.err_inputs_m, port_from, dim, batch_size, chunk_size, self.gconf, ref_to.err_outputs_m, port_to, time) + self.make_initial_store(ref_from.err_inputs_m, port_from, dim, batch_size, chunk_size, self.gconf, ref_to.err_outputs_m, port_to, time) end @@ -176,8 +176,8 @@ function TNN:init(batch_size, chunk_size) for i = 1, #self.dim_out do --Init storage for output ports local ref = self.outputs_p[i].ref local p = self.outputs_p[i].port - self.makeInitialStore(ref.outputs_m, p, self.dim_out[i], batch_size, chunk_size, self.gconf, self.outputs_m, i, 0) - self.makeInitialStore(ref.err_inputs_m, p, self.dim_out[i], batch_size, chunk_size, self.gconf, self.err_inputs_m, i, 0) + self.make_initial_store(ref.outputs_m, p, self.dim_out[i], batch_size, chunk_size, self.gconf, self.outputs_m, i, 0) + self.make_initial_store(ref.err_inputs_m, p, self.dim_out[i], batch_size, chunk_size, self.gconf, self.err_inputs_m, i, 0) end self.inputs_m = {} @@ -185,8 +185,8 @@ function TNN:init(batch_size, chunk_size) for i = 1, #self.dim_in do --Init storage for input ports local ref = self.inputs_p[i].ref local p = self.inputs_p[i].port - self.makeInitialStore(ref.inputs_m, p, self.dim_in[i], batch_size, chunk_size, self.gconf, self.inputs_m, i, 0) - self.makeInitialStore(ref.err_outputs_m, p, self.dim_in[i], batch_size, chunk_size, self.gconf, self.err_outputs_m, i, 0) + self.make_initial_store(ref.inputs_m, p, self.dim_in[i], batch_size, chunk_size, self.gconf, self.inputs_m, i, 0) + self.make_initial_store(ref.err_outputs_m, p, self.dim_in[i], batch_size, chunk_size, self.gconf, self.err_outputs_m, i, 0) end for id, ref in pairs(self.layers) do --Calling init for child layers @@ -285,13 +285,13 @@ end --reader: some reader --Returns: bool, whether has new feed --Returns: feeds, a table that will be filled with the reader's feeds -function TNN:getFeedFromReader(reader) +function TNN:getfeed_from_reader(reader) local feeds_now = self.feeds_now local got_new = reader:get_batch(feeds_now) return got_new, feeds_now end -function TNN:moveRightToNextMB() --move output history activations of 1..chunk_size to 1-chunk_size..0 +function TNN:move_right_to_nextmb() --move output history activations of 1..chunk_size to 1-chunk_size..0 for t = 1, self.chunk_size, 1 do for id, ref in pairs(self.layers) do for p = 1, #ref.dim_out do @@ -345,7 +345,7 @@ end --ref: the TNN_ref of a layer --t: the current time to propagate function TNN:propagate_dfs(ref, t) - if (self:outOfFeedRange(t)) then + if (self:out_of_feedrange(t)) then return end if (ref.outputs_b[t][1] == true) then --already propagated, 1 is just a random port @@ -357,7 +357,7 @@ function TNN:propagate_dfs(ref, t) local flag = true --whether have all inputs for _, conn in pairs(ref.i_conns_p) do local p = conn.dst.port - if (not (ref.inputs_b[t][p] or self:outOfFeedRange(t - conn.time))) then + if (not (ref.inputs_b[t][p] or self:out_of_feedrange(t - conn.time))) then flag = false break end @@ -465,7 +465,7 @@ end --ref: the TNN_ref of a layer --t: the current time to propagate function TNN:backpropagate_dfs(ref, t, do_update) - if (self:outOfFeedRange(t)) then + if (self:out_of_feedrange(t)) then return end if (ref.err_outputs_b[t][1] == true) then --already back_propagated, 1 is just a random port @@ -477,7 +477,7 @@ function TNN:backpropagate_dfs(ref, t, do_update) local flag = true --whether have all inputs for _, conn in pairs(ref.o_conns_p) do local p = conn.src.port - if (not (ref.err_inputs_b[t][p] or self:outOfFeedRange(t + conn.time))) then + if (not (ref.err_inputs_b[t][p] or self:out_of_feedrange(t + conn.time))) then flag = false break end diff --git a/nerv/examples/lmptb/tnn_ptb_main.lua b/nerv/examples/lmptb/tnn_ptb_main.lua index f68311c..c37b217 100644 --- a/nerv/examples/lmptb/tnn_ptb_main.lua +++ b/nerv/examples/lmptb/tnn_ptb_main.lua @@ -17,7 +17,7 @@ local LMTrainer = nerv.LMTrainer function prepare_parameters(global_conf, iter) printf("%s preparing parameters...\n", global_conf.sche_log_pre) - if (iter == -1) then --first time + if iter == -1 then --first time printf("%s first time, generating parameters...\n", global_conf.sche_log_pre) ltp_ih = nerv.LinearTransParam("ltp_ih", global_conf) ltp_ih.trans = global_conf.cumat_type(global_conf.vocab:size(), global_conf.hidden_size) --index 0 is for zero, others correspond to vocab index(starting from 1) @@ -290,9 +290,11 @@ printf("%s building vocab...\n", global_conf.sche_log_pre) global_conf.vocab:build_file(global_conf.vocab_fn, false) ppl_rec = {} -if (start_iter == -1) then +if start_iter == -1 then prepare_parameters(global_conf, -1) --randomly generate parameters +end +if start_iter == -1 or start_iter == 0 then print("===INITIAL VALIDATION===") local tnn, paramRepo = load_net(global_conf, 0) local result = LMTrainer.lm_process_file(global_conf, global_conf.valid_fn, tnn, false) --false update! @@ -309,9 +311,6 @@ if (start_iter == -1) then print() end -if (start_iter == 0) then - nerv.error("start_iter should not be zero") -end local final_iter for iter = start_iter, global_conf.max_iter, 1 do final_iter = iter --for final testing @@ -335,21 +334,17 @@ for iter = start_iter, global_conf.max_iter, 1 do if ((ppl_last / ppl_rec[iter].valid < 1.0003 or lr_half == true) and iter > global_conf.decay_iter) then global_conf.lrate = (global_conf.lrate * 0.6) end - if (ppl_rec[iter].valid < ppl_last) then + if ppl_rec[iter].valid < ppl_last then printf("%s PPL improves, saving net to file %s.%d...\n", global_conf.sche_log_pre, global_conf.param_fn, iter) paramRepo:export(global_conf.param_fn .. '.' .. tostring(iter), nil) else printf("%s PPL did not improve, rejected, copying param file of last iter...\n", global_conf.sche_log_pre) os.execute('cp ' .. global_conf.param_fn..'.'..tostring(iter - 1) .. ' ' .. global_conf.param_fn..'.'..tostring(iter)) - --if (lr_half == true) then - -- printf("%s LR is already halfing, end training...\n", global_conf.sche_log_pre) - -- break - --end end - if (ppl_last / ppl_rec[iter].valid < 1.0003 or lr_half == true) then + if ppl_last / ppl_rec[iter].valid < 1.0003 or lr_half == true then lr_half = true end - if (ppl_rec[iter].valid < ppl_last) then + if ppl_rec[iter].valid < ppl_last then ppl_last = ppl_rec[iter].valid end printf("\n") |