aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortxh18 <[email protected]>2015-11-30 11:05:24 +0800
committertxh18 <[email protected]>2015-11-30 11:05:24 +0800
commit3171a7a1e404cc82857892d0c212824cf74ce2df (patch)
tree11800ef6c627371a847ec95080d30c65e81e0c25
parentd17e801f3de1e31cd06b181f5f0acb7adb2ad08c (diff)
added ooutputGate for lstm_t
-rw-r--r--nerv/examples/lmptb/lstmlm_ptb_main.lua6
-rw-r--r--nerv/examples/lmptb/tnn/layersT/lstm_t.lua19
2 files changed, 18 insertions, 7 deletions
diff --git a/nerv/examples/lmptb/lstmlm_ptb_main.lua b/nerv/examples/lmptb/lstmlm_ptb_main.lua
index 69f26f5..7ec583d 100644
--- a/nerv/examples/lmptb/lstmlm_ptb_main.lua
+++ b/nerv/examples/lmptb/lstmlm_ptb_main.lua
@@ -184,12 +184,12 @@ test_fn = data_dir .. '/ptb.test.txt.adds'
vocab_fn = data_dir .. '/vocab'
global_conf = {
- lrate = 0.001, wcost = 1e-6, momentum = 0, clip_t = 0.01,
+ lrate = 0.1, wcost = 1e-6, momentum = 0, clip_t = 10,
cumat_type = nerv.CuMatrixFloat,
mmat_type = nerv.MMatrixFloat,
nn_act_default = 0,
- hidden_size = 200, --set to 400 for a stable good test PPL
+ hidden_size = 300, --set to 400 for a stable good test PPL
chunk_size = 15,
batch_size = 10,
max_iter = 35,
@@ -201,7 +201,7 @@ global_conf = {
test_fn = test_fn,
vocab_fn = vocab_fn,
sche_log_pre = "[SCHEDULER]:",
- log_w_num = 400, --give a message when log_w_num words have been processed
+ log_w_num = 40000, --give a message when log_w_num words have been processed
timer = nerv.Timer(),
work_dir_base = '/home/slhome/txh18/workspace/nerv/play/ptbEXP/tnn_lstm_test'
}
diff --git a/nerv/examples/lmptb/tnn/layersT/lstm_t.lua b/nerv/examples/lmptb/tnn/layersT/lstm_t.lua
index 409c617..0bd9c76 100644
--- a/nerv/examples/lmptb/tnn/layersT/lstm_t.lua
+++ b/nerv/examples/lmptb/tnn/layersT/lstm_t.lua
@@ -20,12 +20,12 @@ function LSTMLayerT:__init(id, global_conf, layer_conf)
local layers = {
["nerv.CombinerLayer"] = {
[ap("inputXDup")] = {{}, {["dim_in"] = {self.dim_in[1]},
- ["dim_out"] = {self.dim_in[1], self.dim_in[1], self.dim_in[1]}, ["lambda"] = {1}}},
+ ["dim_out"] = {self.dim_in[1], self.dim_in[1], self.dim_in[1], self.dim_in[1]}, ["lambda"] = {1}}},
[ap("inputHDup")] = {{}, {["dim_in"] = {self.dim_in[2]},
- ["dim_out"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2]}, ["lambda"] = {1}}},
+ ["dim_out"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2], self.dim_in[2]}, ["lambda"] = {1}}},
[ap("inputCDup")] = {{}, {["dim_in"] = {self.dim_in[3]},
["dim_out"] = {self.dim_in[3], self.dim_in[3], self.dim_in[3], self.dim_in[3]}, ["lambda"] = {1}}},
- [ap("mainCDup")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3], self.dim_in[3]},
+ [ap("mainCDup")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3], self.dim_in[3], self.dim_in[3]},
["lambda"] = {1, 1}}},
},
["nerv.AffineLayer"] = {
@@ -41,10 +41,14 @@ function LSTMLayerT:__init(id, global_conf, layer_conf)
["dim_out"] = {self.dim_in[3]}, ["pr"] = pr}},
[ap("inputGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2], self.dim_in[3]},
["dim_out"] = {self.dim_in[3]}, ["pr"] = pr}},
+ [ap("outputGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2], self.dim_in[3]},
+ ["dim_out"] = {self.dim_in[3]}, ["pr"] = pr}},
+
},
["nerv.ElemMulLayer"] = {
[ap("inputGMulL")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3]}}},
[ap("forgetGMulL")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3]}}},
+ [ap("outputGMulL")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3]}}},
},
}
@@ -77,10 +81,17 @@ function LSTMLayerT:__init(id, global_conf, layer_conf)
[ap("inputGMulL[1]")] = ap("mainCDup[1]"),
[ap("forgetGMulL[1]")] = ap("mainCDup[2]"),
+ [ap("inputXDup[4]")] = ap("outputGateL[1]"),
+ [ap("inputHDup[4]")] = ap("outputGateL[2]"),
+ [ap("mainCDup[3]")] = ap("outputGateL[3]"),
+
[ap("mainCDup[2]")] = "<output>[2]",
[ap("mainCDup[1]")] = ap("outputTanhL[1]"),
+
+ [ap("outputTanhL[1]")] = ap("outputGMulL[1]"),
+ [ap("outputGateL[1]")] = ap("outputGMulL[2]"),
- [ap("outputTanhL[1]")] = "<output>[1]",
+ [ap("outputGMulL[1]")] = "<output>[1]",
}
self.dagL = nerv.DAGLayerT(self.id, global_conf,
{["dim_in"] = self.dim_in, ["dim_out"] = self.dim_out, ["sub_layers"] = layerRepo,