cleaning files...

author: txh18 <[email protected]> 2015-11-12 17:13:35 +0800
committer: txh18 <[email protected]> 2015-11-12 17:13:35 +0800
commit: c56722702c099a6d4b3ea1599836e6226bdccc46 (patch)
tree: d6d7c741e69ac1c33d34c3da691da7def416012e /nerv
parent: ae2bb39ec6ea46a8bdfbd3b8b145ecfb7ca9032f (diff)
4 files changed, 127 insertions, 124 deletions
diff --git a/nerv/examples/lmptb/lm_trainer.lua b/nerv/examples/lmptb/lm_trainer.lua
new file mode 100644
index 0000000..d34634c
--- /dev/null
+++ b/nerv/examples/lmptb/lm_trainer.lua
@@ -0,0 +1,89 @@
+require 'lmptb.lmvocab'
+require 'lmptb.lmfeeder'
+require 'lmptb.lmutil'
+require 'lmptb.layer.init'
+require 'rnn.init'
+require 'lmptb.lmseqreader'
+
+local LMTrainer = nerv.class('nerv.LMTrainer')
+
+local printf = nerv.printf
+
+--Returns: LMResult
+function LMTrainer.lm_process_file(global_conf, fn, tnn, do_train)
+    local reader = nerv.LMSeqReader(global_conf, global_conf.batch_size, global_conf.chunk_size, global_conf.vocab)
+    reader:open_file(fn)
+    local result = nerv.LMResult(global_conf, global_conf.vocab)
+    result:init("rnn")
+    
+    tnn:flush_all() --caution: will also flush the inputs from the reader!
+
+    local next_log_wcn = global_conf.log_w_num
+
+    while (1) do
+        local r, feeds
+
+        r, feeds = tnn:getFeedFromReader(reader)
+        if (r == false) then break end
+    
+        for t = 1, global_conf.chunk_size do
+            tnn.err_inputs_m[t][1]:fill(1)
+            for i = 1, global_conf.batch_size do
+                if (bit.band(feeds.flags_now[t][i], nerv.TNN.FC.HAS_LABEL) == 0) then
+                    tnn.err_inputs_m[t][1][i - 1][0] = 0
+                end
+            end
+        end
+
+        --[[
+        for j = 1, global_conf.chunk_size, 1 do
+            for i = 1, global_conf.batch_size, 1 do
+                printf("%s[L(%s)] ", feeds.inputs_s[j][i], feeds.labels_s[j][i])   --vocab:get_word_str(input[i][j]).id
+            end
+            printf("\n")
+        end
+        printf("\n")
+        ]]--
+
+        tnn:net_propagate()
+ 
+        if (do_train == true) then
+            tnn:net_backpropagate(false) 
+            tnn:net_backpropagate(true)
+        end
+ 
+        for t = 1, global_conf.chunk_size, 1 do
+            for i = 1, global_conf.batch_size, 1 do
+                if (feeds.labels_s[t][i] ~= global_conf.vocab.null_token) then
+                    result:add("rnn", feeds.labels_s[t][i], math.exp(tnn.outputs_m[t][1][i - 1][0]))
+                end
+            end            
+        end
+        if (result["rnn"].cn_w > next_log_wcn) then
+            next_log_wcn = next_log_wcn + global_conf.log_w_num
+            printf("%s %d words processed %s.\n", global_conf.sche_log_pre, result["rnn"].cn_w, os.date()) 
+            printf("\t%s log prob per sample :%f.\n", global_conf.sche_log_pre, result:logp_sample("rnn"))
+            nerv.LMUtil.wait(0.1)
+        end
+         
+        --[[
+        for t = 1, global_conf.chunk_size do
+            print(tnn.outputs_m[t][1])
+        end
+        ]]--
+
+        tnn:moveRightToNextMB()
+
+        collectgarbage("collect")                                              
+
+        --break --debug
+    end
+    
+    printf("%s Displaying result:\n", global_conf.sche_log_pre)
+    printf("%s %s\n", global_conf.sche_log_pre, result:status("rnn"))
+    printf("%s Doing on %s end.\n", global_conf.sche_log_pre, fn)
+    
+    return result
+end
+
+
diff --git a/nerv/examples/lmptb/rnn/tnn.lua b/nerv/examples/lmptb/rnn/tnn.lua
index fc5321d..9850fe5 100644
--- a/nerv/examples/lmptb/rnn/tnn.lua
+++ b/nerv/examples/lmptb/rnn/tnn.lua
@@ -1,5 +1,4 @@
 local TNN = nerv.class("nerv.TNN", "nerv.Layer")
-local DAGLayer = TNN
 
 local function parse_id(str)
     --used to parse layerid[portid],time
@@ -91,7 +90,7 @@ end
 
 function TNN:__init(id, global_conf, layer_conf)
     local layers = {}
-    local inputs_p = {} --map:port of the TDAGLayer to layer ref and port
+    local inputs_p = {} --map:port of the TNN to layer ref and port
     local outputs_p = {}
     local dim_in = layer_conf.dim_in
     local dim_out = layer_conf.dim_out
@@ -394,7 +393,6 @@ function TNN:propagate_dfs(ref, t)
             if (seq_start > 0 or seq_end > 0) then
                 for p, conn in pairs(ref.o_conns_p) do
                     if ((ref.o_conns_p[p].time > 0 and seq_end > 0) or (ref.o_conns_p[p].time < 0 and seq_start > 0)) then 
-                        self.gconf.fz2 = self.gconf.fz2 + 1
                         ref.outputs_m[t][p][i - 1]:fill(self.gconf.nn_act_default)                       
                     end
                 end
@@ -502,7 +500,6 @@ function TNN:backpropagate_dfs(ref, t, do_update)
             if (seq_start > 0 or seq_end > 0) then
                 for p, conn in pairs(ref.i_conns_p) do
                     if ((ref.i_conns_p[p].time > 0 and seq_start > 0) or (ref.i_conns_p[p].time < 0 and seq_end > 0)) then --cross-border, set to zero
-                        self.gconf.fz = self.gconf.fz + 1
                         ref.err_outputs_m[t][p][i - 1]:fill(0)
                     end
                 end
@@ -534,29 +531,3 @@ function TNN:get_params()
     return nerv.ParamRepo.merge(param_repos)
 end
 
-DAGLayer.PORT_TYPES = {
-    INPUT = {},
-    OUTPUT = {},
-    ERR_INPUT = {},
-    ERR_OUTPUT = {}
-}
-
-function DAGLayer:get_intermediate(id, port_type)
-    if id == "<input>" or id == "<output>" then
-        nerv.error("an actual real layer id is expected")
-    end
-    local layer = self.layers[id]
-    if layer == nil then
-        nerv.error("layer id %s not found", id)
-    end
-    if port_type == DAGLayer.PORT_TYPES.INPUT then
-        return layer.inputs
-    elseif port_type == DAGLayer.PORT_TYPES.OUTPUT then
-        return layer.outputs
-    elseif port_type == DAGLayer.PORT_TYPES.ERR_INPUT then
-        return layer.err_inputs
-    elseif port_type == DAGLayer.PORT_TYPES.ERR_OUTPUT then
-        return layer.err_outputs
-    end
-    nerv.error("unrecognized port type")
-end
diff --git a/nerv/examples/lmptb/m-tests/tnn_test.lua b/nerv/examples/lmptb/tnn_ptb_main.lua
index e2c0d39..803ae68 100644
--- a/nerv/examples/lmptb/m-tests/tnn_test.lua
+++ b/nerv/examples/lmptb/tnn_ptb_main.lua
@@ -4,9 +4,11 @@ require 'lmptb.lmutil'
 require 'lmptb.layer.init'
 require 'rnn.init'
 require 'lmptb.lmseqreader'
+require 'lm_trainer'
 
 --[[global function rename]]--
-printf = nerv.printf
+local printf = nerv.printf
+local LMTrainer = nerv.LMTrainer
 --[[global function rename ends]]--
 
 --global_conf: table
@@ -144,89 +146,6 @@ function load_net(global_conf)
     return tnn, paramRepo
 end
 
---Returns: LMResult
-function lm_process_file(global_conf, fn, tnn, do_train)
-    local reader = nerv.LMSeqReader(global_conf, global_conf.batch_size, global_conf.chunk_size, global_conf.vocab)
-    reader:open_file(fn)
-    local result = nerv.LMResult(global_conf, global_conf.vocab)
-    result:init("rnn")
-    
-    tnn:flush_all() --caution: will also flush the inputs from the reader!
-
-    local next_log_wcn = global_conf.log_w_num
-
-    global_conf.fz = 0
-    global_conf.fz2 = 0
-
-    while (1) do
-        local r, feeds
-
-        r, feeds = tnn:getFeedFromReader(reader)
-        if (r == false) then break end
-    
-        for t = 1, global_conf.chunk_size do
-            tnn.err_inputs_m[t][1]:fill(1)
-            for i = 1, global_conf.batch_size do
-                if (bit.band(feeds.flags_now[t][i], nerv.TNN.FC.HAS_LABEL) == 0) then
-                    tnn.err_inputs_m[t][1][i - 1][0] = 0
-                end
-            end
-        end
-
-        --[[
-        for j = 1, global_conf.chunk_size, 1 do
-            for i = 1, global_conf.batch_size, 1 do
-                printf("%s[L(%s)] ", feeds.inputs_s[j][i], feeds.labels_s[j][i])   --vocab:get_word_str(input[i][j]).id
-            end
-            printf("\n")
-        end
-        printf("\n")
-        ]]--
-
-        tnn:net_propagate()
- 
-        if (do_train == true) then
-            tnn:net_backpropagate(false) 
-            tnn:net_backpropagate(true)
-        end
- 
-        for t = 1, global_conf.chunk_size, 1 do
-            for i = 1, global_conf.batch_size, 1 do
-                if (feeds.labels_s[t][i] ~= global_conf.vocab.null_token) then
-                    result:add("rnn", feeds.labels_s[t][i], math.exp(tnn.outputs_m[t][1][i - 1][0]))
-                end
-            end            
-        end
-        if (result["rnn"].cn_w > next_log_wcn) then
-            next_log_wcn = next_log_wcn + global_conf.log_w_num
-            printf("%s %d words processed %s.\n", global_conf.sche_log_pre, result["rnn"].cn_w, os.date()) 
-            printf("\t%s log prob per sample :%f.\n", global_conf.sche_log_pre, result:logp_sample("rnn"))
-            nerv.LMUtil.wait(0.1)
-        end
-         
-        --[[
-        for t = 1, global_conf.chunk_size do
-            print(tnn.outputs_m[t][1])
-        end
-        ]]--
-
-        tnn:moveRightToNextMB()
-
-        collectgarbage("collect")                                              
-
-        --break --debug
-    end
-    
-    print("gconf.fz", global_conf.fz)
-    print("gconf.fz2", global_conf.fz2)
-
-    printf("%s Displaying result:\n", global_conf.sche_log_pre)
-    printf("%s %s\n", global_conf.sche_log_pre, result:status("rnn"))
-    printf("%s Doing on %s end.\n", global_conf.sche_log_pre, fn)
-    
-    return result
-end
-
 local train_fn, valid_fn, test_fn, global_conf
 local set = arg[1] --"test"
 
@@ -290,6 +209,12 @@ global_conf.train_fn_shuf = global_conf.work_dir .. '/train_fn_shuf'
 global_conf.train_fn_shuf_bak = global_conf.train_fn_shuf .. '_bak'
 global_conf.param_fn = global_conf.work_dir .. "/params"
 
+printf("%s printing global_conf\n", global_conf.sche_log_pre)
+for id, value in pairs(global_conf) do
+    print(id, value)
+end
+nerv.LMUtil.wait(2)
+
 printf("%s creating work_dir...\n", global_conf.sche_log_pre)
 os.execute("mkdir -p "..global_conf.work_dir)
 os.execute("cp " .. global_conf.train_fn .. " " .. global_conf.train_fn_shuf)
@@ -302,11 +227,15 @@ prepare_parameters(global_conf, true) --randomly generate parameters
 
 print("===INITIAL VALIDATION===") 
 local tnn, paramRepo = load_net(global_conf)
-local result = lm_process_file(global_conf, global_conf.valid_fn, tnn, false) --false update!
+local result = LMTrainer.lm_process_file(global_conf, global_conf.valid_fn, tnn, false) --false update!
 nerv.LMUtil.wait(3)
 ppl_rec = {} 
 lr_rec = {}
-ppl_rec[0] = result:ppl_net("rnn")  ppl_last = ppl_rec[0]
+ppl_rec[0] = {} 
+ppl_rec[0].valid = result:ppl_net("rnn")  
+ppl_last = ppl_rec[0].valid 
+ppl_rec[0].train = 0 
+ppl_rec[0].test = 0
 lr_rec[0] = 0 
 print() 
 local lr_half = false 
@@ -314,33 +243,42 @@ for iter = 1, global_conf.max_iter, 1 do
     tnn, paramRepo = load_net(global_conf) 
     printf("===ITERATION %d LR %f===\n", iter, global_conf.lrate) 
     global_conf.sche_log_pre = "[SCHEDULER ITER"..iter.." LR"..global_conf.lrate.."]:" 
-    lm_process_file(global_conf, global_conf.train_fn_shuf, tnn, true) --true update!
+    result = LMTrainer.lm_process_file(global_conf, global_conf.train_fn_shuf, tnn, true) --true update!
+    ppl_rec[iter] = {}
+    ppl_rec[iter].train = result:ppl_net("rnn")
     --shuffling training file
+    printf("%s shuffling training file\n", global_conf.sche_log_pre)
     os.execute('cp ' .. global_conf.train_fn_shuf .. ' ' .. global_conf.train_fn_shuf_bak)
     os.execute('cat ' .. global_conf.train_fn_shuf_bak .. ' | sort -R --random-source=/dev/zero > ' .. global_conf.train_fn_shuf)
+    printf("===PEEK ON TEST %d===\n", iter) 
+    result = LMTrainer.lm_process_file(global_conf, global_conf.test_fn, tnn, false) --false update!
+    ppl_rec[iter].test = result:ppl_net("rnn")  
     printf("===VALIDATION %d===\n", iter) 
-    result = lm_process_file(global_conf, global_conf.valid_fn, tnn, false) --false update!
-    ppl_rec[iter] = result:ppl_net("rnn") 
+    result = LMTrainer.lm_process_file(global_conf, global_conf.valid_fn, tnn, false) --false update!
+    ppl_rec[iter].valid = result:ppl_net("rnn") 
     lr_rec[iter] = global_conf.lrate 
-    if (ppl_last / ppl_rec[iter] < 1.0003 or lr_half == true) then 
+    if (ppl_last / ppl_rec[iter].valid < 1.0003 or lr_half == true) then 
         global_conf.lrate = (global_conf.lrate * 0.6)
         lr_half = true 
     end 
-    if (ppl_rec[iter] < ppl_last) then 
+    if (ppl_rec[iter].valid < ppl_last) then 
         printf("%s saving net to file %s...\n", global_conf.sche_log_pre, global_conf.param_fn) 
         paramRepo:export(global_conf.param_fn, nil) 
-        ppl_last = ppl_rec[iter] 
+        ppl_last = ppl_rec[iter].valid
     else 
         printf("%s PPL did not improve, rejected...\n", global_conf.sche_log_pre) 
+        if (lr_halg == true) then
+            printf("%s LR is already halfing, end training...\n", global_conf.sche_log_pre)
+        end
     end 
     printf("\n") 
     nerv.LMUtil.wait(2) 
 end
 printf("===VALIDATION PPL record===\n") 
-for i = 0, #ppl_rec do printf("<ITER%d LR%.5f: %.3f> ", i, lr_rec[i], ppl_rec[i]) end 
+for i = 0, #ppl_rec do printf("<ITER%d LR%.5f train:%.3f valid:%.3f test:%.3f> \n", i, lr_rec[i], ppl_rec[i].train, ppl_rec[i].valid, ppl_rec[i].test) end 
 printf("\n") 
 printf("===FINAL TEST===\n") 
 global_conf.sche_log_pre = "[SCHEDULER FINAL_TEST]:" 
 tnn, paramRepo = load_net(global_conf) 
-lm_process_file(global_conf, global_conf.test_fn, tnn, false) --false update!
+LMTrainer.lm_process_file(global_conf, global_conf.test_fn, tnn, false) --false update!
 
diff --git a/nerv/examples/lmptb/main.lua b/nerv/examples/lmptb/unfold_ptb_main.lua
index a93c148..6c4ead3 100644
--- a/nerv/examples/lmptb/main.lua
+++ b/nerv/examples/lmptb/unfold_ptb_main.lua
@@ -1,3 +1,8 @@
+--author: txh18(Tianxing)
+--This recipe is rnnlm with bptt, unfolding for each time instance
+--The training framework is the same with Mikolov's rnnlm, Tianxing's XRNN-CPU and Wengong's XRNN-GPU
+--It uses DAGLayer to simulate RNNLM unfold
+
 --TODO: the select_linear now accepts a column vector, instead of a row vector
 
 require 'lmptb.lmvocab'
author	txh18 <[email protected]>	2015-11-12 17:13:35 +0800
committer	txh18 <[email protected]>	2015-11-12 17:13:35 +0800
commit	c56722702c099a6d4b3ea1599836e6226bdccc46 (patch)
tree	d6d7c741e69ac1c33d34c3da691da7def416012e /nerv
parent	ae2bb39ec6ea46a8bdfbd3b8b145ecfb7ca9032f (diff)