summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--nerv/examples/lmptb/lstmlm_ptb_main.lua13
-rw-r--r--nerv/tnn/layersT/lstm_t.lua11
2 files changed, 8 insertions, 16 deletions
diff --git a/nerv/examples/lmptb/lstmlm_ptb_main.lua b/nerv/examples/lmptb/lstmlm_ptb_main.lua
index d5408ba..a2dcbba 100644
--- a/nerv/examples/lmptb/lstmlm_ptb_main.lua
+++ b/nerv/examples/lmptb/lstmlm_ptb_main.lua
@@ -77,10 +77,6 @@ function prepare_layers(global_conf)
--local recurrentLconfig = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["clip"] = 10, ["direct_update"] = du, ["pr"] = pr}}
local layers = {
- --["nerv.AffineRecurrentLayer"] = {
- -- ["recurrentL1"] = recurrentLconfig,
- --},
-
["nerv.LSTMLayerT"] = {
["lstmL1"] = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size, global_conf.hidden_size}, ["pr"] = pr}},
},
@@ -93,10 +89,6 @@ function prepare_layers(global_conf)
["selectL1"] = {{}, {["dim_in"] = {1}, ["dim_out"] = {global_conf.hidden_size}, ["vocab"] = global_conf.vocab, ["pr"] = pr}},
},
- --["nerv.SigmoidLayer"] = {
- -- ["sigmoidL1"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}}
- --},
-
["nerv.CombinerLayer"] = {
["combinerL1"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size, global_conf.hidden_size}, ["lambda"] = {1}}},
},
@@ -214,7 +206,7 @@ global_conf = {
layer_num = 1,
chunk_size = 15,
batch_size = 20,
- max_iter = 45,
+ max_iter = 35,
lr_decay = 1.003,
decay_iter = 10,
param_random = function() return (math.random() / 5 - 0.1) end,
@@ -323,7 +315,8 @@ global_conf.train_fn_shuf = global_conf.work_dir .. '/train_fn_shuf'
global_conf.train_fn_shuf_bak = global_conf.train_fn_shuf .. '_bak'
global_conf.param_fn = global_conf.work_dir .. "/params"
global_conf.dropout_list = nerv.SUtil.parse_schedule(global_conf.dropout_str)
-global_conf.log_fn = global_conf.work_dir .. '/log_lstm_tnn_' .. commands_str ..os.date("_TT%X_%m_%d",os.time())
+global_conf.log_fn = global_conf.work_dir .. '/log_lstm_tnn_' .. commands_str ..os.date("_TT%m_%d_%X",os.time())
+global_conf.log_fn, _ = string.gsub(global_conf.log_fn, ':', '-')
commands = nerv.SUtil.parse_commands_set(commands_str)
nerv.printf("%s creating work_dir(%s)...\n", global_conf.sche_log_pre, global_conf.work_dir)
diff --git a/nerv/tnn/layersT/lstm_t.lua b/nerv/tnn/layersT/lstm_t.lua
index 2a3342d..04d0600 100644
--- a/nerv/tnn/layersT/lstm_t.lua
+++ b/nerv/tnn/layersT/lstm_t.lua
@@ -24,12 +24,12 @@ function LSTMLayerT:__init(id, global_conf, layer_conf)
[ap("inputHDup")] = {{}, {["dim_in"] = {self.dim_in[2]},
["dim_out"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2], self.dim_in[2]}, ["lambda"] = {1}}},
[ap("inputCDup")] = {{}, {["dim_in"] = {self.dim_in[3]},
- ["dim_out"] = {self.dim_in[3], self.dim_in[3], self.dim_in[3], self.dim_in[3]}, ["lambda"] = {1}}},
+ ["dim_out"] = {self.dim_in[3], self.dim_in[3], self.dim_in[3]}, ["lambda"] = {1}}},
[ap("mainCDup")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3], self.dim_in[3], self.dim_in[3]},
["lambda"] = {1, 1}}},
},
["nerv.AffineLayer"] = {
- [ap("mainAffineL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2], self.dim_in[3]},
+ [ap("mainAffineL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]},
["dim_out"] = {self.dim_out[1]}, ["pr"] = pr}},
},
["nerv.TanhLayer"] = {
@@ -61,21 +61,20 @@ function LSTMLayerT:__init(id, global_conf, layer_conf)
[ap("inputXDup[1]")] = ap("mainAffineL[1]"),
[ap("inputHDup[1]")] = ap("mainAffineL[2]"),
- [ap("inputCDup[1]")] = ap("mainAffineL[3]"),
[ap("mainAffineL[1]")] = ap("mainTanhL[1]"),
[ap("inputXDup[2]")] = ap("inputGateL[1]"),
[ap("inputHDup[2]")] = ap("inputGateL[2]"),
- [ap("inputCDup[2]")] = ap("inputGateL[3]"),
+ [ap("inputCDup[1]")] = ap("inputGateL[3]"),
[ap("inputXDup[3]")] = ap("forgetGateL[1]"),
[ap("inputHDup[3]")] = ap("forgetGateL[2]"),
- [ap("inputCDup[3]")] = ap("forgetGateL[3]"),
+ [ap("inputCDup[2]")] = ap("forgetGateL[3]"),
[ap("mainTanhL[1]")] = ap("inputGMulL[1]"),
[ap("inputGateL[1]")] = ap("inputGMulL[2]"),
- [ap("inputCDup[4]")] = ap("forgetGMulL[1]"),
+ [ap("inputCDup[3]")] = ap("forgetGMulL[1]"),
[ap("forgetGateL[1]")] = ap("forgetGMulL[2]"),
[ap("inputGMulL[1]")] = ap("mainCDup[1]"),