added ooutputGate for lstm_t

author: txh18 <[email protected]> 2015-11-30 11:05:24 +0800
committer: txh18 <[email protected]> 2015-11-30 11:05:24 +0800
commit: 3171a7a1e404cc82857892d0c212824cf74ce2df (patch)
tree: 11800ef6c627371a847ec95080d30c65e81e0c25 /nerv
parent: d17e801f3de1e31cd06b181f5f0acb7adb2ad08c (diff)
2 files changed, 18 insertions, 7 deletions
diff --git a/nerv/examples/lmptb/lstmlm_ptb_main.lua b/nerv/examples/lmptb/lstmlm_ptb_main.lua
index 69f26f5..7ec583d 100644
--- a/nerv/examples/lmptb/lstmlm_ptb_main.lua
+++ b/nerv/examples/lmptb/lstmlm_ptb_main.lua
@@ -184,12 +184,12 @@ test_fn = data_dir .. '/ptb.test.txt.adds'
 vocab_fn = data_dir .. '/vocab'
 
 global_conf = {
-    lrate = 0.001, wcost = 1e-6, momentum = 0, clip_t = 0.01,
+    lrate = 0.1, wcost = 1e-6, momentum = 0, clip_t = 10,
     cumat_type = nerv.CuMatrixFloat,
     mmat_type = nerv.MMatrixFloat,
     nn_act_default = 0, 
 
-    hidden_size = 200, --set to 400 for a stable good test PPL
+    hidden_size = 300, --set to 400 for a stable good test PPL
     chunk_size = 15,
     batch_size = 10, 
     max_iter = 35,
@@ -201,7 +201,7 @@ global_conf = {
     test_fn = test_fn,
     vocab_fn = vocab_fn,
     sche_log_pre = "[SCHEDULER]:",
-    log_w_num = 400, --give a message when log_w_num words have been processed
+    log_w_num = 40000, --give a message when log_w_num words have been processed
     timer = nerv.Timer(),
     work_dir_base = '/home/slhome/txh18/workspace/nerv/play/ptbEXP/tnn_lstm_test'
 }
diff --git a/nerv/examples/lmptb/tnn/layersT/lstm_t.lua b/nerv/examples/lmptb/tnn/layersT/lstm_t.lua
index 409c617..0bd9c76 100644
--- a/nerv/examples/lmptb/tnn/layersT/lstm_t.lua
+++ b/nerv/examples/lmptb/tnn/layersT/lstm_t.lua
@@ -20,12 +20,12 @@ function LSTMLayerT:__init(id, global_conf, layer_conf)
     local layers = {
         ["nerv.CombinerLayer"] = {
             [ap("inputXDup")] = {{}, {["dim_in"] = {self.dim_in[1]}, 
-                ["dim_out"] = {self.dim_in[1], self.dim_in[1], self.dim_in[1]}, ["lambda"] = {1}}},
+                ["dim_out"] = {self.dim_in[1], self.dim_in[1], self.dim_in[1], self.dim_in[1]}, ["lambda"] = {1}}},
             [ap("inputHDup")] = {{}, {["dim_in"] = {self.dim_in[2]}, 
-                ["dim_out"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2]}, ["lambda"] = {1}}},
+                ["dim_out"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2], self.dim_in[2]}, ["lambda"] = {1}}},
             [ap("inputCDup")] = {{}, {["dim_in"] = {self.dim_in[3]}, 
                 ["dim_out"] = {self.dim_in[3], self.dim_in[3], self.dim_in[3], self.dim_in[3]}, ["lambda"] = {1}}},
-            [ap("mainCDup")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3], self.dim_in[3]},
+            [ap("mainCDup")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3], self.dim_in[3], self.dim_in[3]},
                 ["lambda"] = {1, 1}}},
         },
         ["nerv.AffineLayer"] = {
@@ -41,10 +41,14 @@ function LSTMLayerT:__init(id, global_conf, layer_conf)
                 ["dim_out"] = {self.dim_in[3]}, ["pr"] = pr}},
             [ap("inputGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2], self.dim_in[3]}, 
                 ["dim_out"] = {self.dim_in[3]}, ["pr"] = pr}},
+            [ap("outputGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2], self.dim_in[3]}, 
+                ["dim_out"] = {self.dim_in[3]}, ["pr"] = pr}},
+
         },
         ["nerv.ElemMulLayer"] = {
             [ap("inputGMulL")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3]}}},
             [ap("forgetGMulL")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3]}}},
+            [ap("outputGMulL")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3]}}},
         },
     }
     
@@ -77,10 +81,17 @@ function LSTMLayerT:__init(id, global_conf, layer_conf)
         [ap("inputGMulL[1]")] = ap("mainCDup[1]"),
         [ap("forgetGMulL[1]")] = ap("mainCDup[2]"),
 
+        [ap("inputXDup[4]")] = ap("outputGateL[1]"),
+        [ap("inputHDup[4]")] = ap("outputGateL[2]"),
+        [ap("mainCDup[3]")] = ap("outputGateL[3]"),
+
         [ap("mainCDup[2]")] = "<output>[2]",
         [ap("mainCDup[1]")] = ap("outputTanhL[1]"),
+        
+        [ap("outputTanhL[1]")] = ap("outputGMulL[1]"),
+        [ap("outputGateL[1]")] = ap("outputGMulL[2]"),
 
-        [ap("outputTanhL[1]")] = "<output>[1]",
+        [ap("outputGMulL[1]")] = "<output>[1]",
     }
     self.dagL = nerv.DAGLayerT(self.id, global_conf, 
             {["dim_in"] = self.dim_in, ["dim_out"] = self.dim_out, ["sub_layers"] = layerRepo,
author	txh18 <[email protected]>	2015-11-30 11:05:24 +0800
committer	txh18 <[email protected]>	2015-11-30 11:05:24 +0800
commit	3171a7a1e404cc82857892d0c212824cf74ce2df (patch)
tree	11800ef6c627371a847ec95080d30c65e81e0c25 /nerv
parent	d17e801f3de1e31cd06b181f5f0acb7adb2ad08c (diff)