diff options
-rw-r--r-- | nerv/examples/lmptb/lstmlm_ptb_main.lua | 13 | ||||
-rw-r--r-- | nerv/tnn/layersT/lstm_t.lua | 11 |
2 files changed, 8 insertions, 16 deletions
diff --git a/nerv/examples/lmptb/lstmlm_ptb_main.lua b/nerv/examples/lmptb/lstmlm_ptb_main.lua index d5408ba..a2dcbba 100644 --- a/nerv/examples/lmptb/lstmlm_ptb_main.lua +++ b/nerv/examples/lmptb/lstmlm_ptb_main.lua @@ -77,10 +77,6 @@ function prepare_layers(global_conf) --local recurrentLconfig = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["clip"] = 10, ["direct_update"] = du, ["pr"] = pr}} local layers = { - --["nerv.AffineRecurrentLayer"] = { - -- ["recurrentL1"] = recurrentLconfig, - --}, - ["nerv.LSTMLayerT"] = { ["lstmL1"] = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size, global_conf.hidden_size}, ["pr"] = pr}}, }, @@ -93,10 +89,6 @@ function prepare_layers(global_conf) ["selectL1"] = {{}, {["dim_in"] = {1}, ["dim_out"] = {global_conf.hidden_size}, ["vocab"] = global_conf.vocab, ["pr"] = pr}}, }, - --["nerv.SigmoidLayer"] = { - -- ["sigmoidL1"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}} - --}, - ["nerv.CombinerLayer"] = { ["combinerL1"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size, global_conf.hidden_size}, ["lambda"] = {1}}}, }, @@ -214,7 +206,7 @@ global_conf = { layer_num = 1, chunk_size = 15, batch_size = 20, - max_iter = 45, + max_iter = 35, lr_decay = 1.003, decay_iter = 10, param_random = function() return (math.random() / 5 - 0.1) end, @@ -323,7 +315,8 @@ global_conf.train_fn_shuf = global_conf.work_dir .. '/train_fn_shuf' global_conf.train_fn_shuf_bak = global_conf.train_fn_shuf .. '_bak' global_conf.param_fn = global_conf.work_dir .. "/params" global_conf.dropout_list = nerv.SUtil.parse_schedule(global_conf.dropout_str) -global_conf.log_fn = global_conf.work_dir .. '/log_lstm_tnn_' .. commands_str ..os.date("_TT%X_%m_%d",os.time()) +global_conf.log_fn = global_conf.work_dir .. '/log_lstm_tnn_' .. commands_str ..os.date("_TT%m_%d_%X",os.time()) +global_conf.log_fn, _ = string.gsub(global_conf.log_fn, ':', '-') commands = nerv.SUtil.parse_commands_set(commands_str) nerv.printf("%s creating work_dir(%s)...\n", global_conf.sche_log_pre, global_conf.work_dir) diff --git a/nerv/tnn/layersT/lstm_t.lua b/nerv/tnn/layersT/lstm_t.lua index 2a3342d..04d0600 100644 --- a/nerv/tnn/layersT/lstm_t.lua +++ b/nerv/tnn/layersT/lstm_t.lua @@ -24,12 +24,12 @@ function LSTMLayerT:__init(id, global_conf, layer_conf) [ap("inputHDup")] = {{}, {["dim_in"] = {self.dim_in[2]}, ["dim_out"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2], self.dim_in[2]}, ["lambda"] = {1}}}, [ap("inputCDup")] = {{}, {["dim_in"] = {self.dim_in[3]}, - ["dim_out"] = {self.dim_in[3], self.dim_in[3], self.dim_in[3], self.dim_in[3]}, ["lambda"] = {1}}}, + ["dim_out"] = {self.dim_in[3], self.dim_in[3], self.dim_in[3]}, ["lambda"] = {1}}}, [ap("mainCDup")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3], self.dim_in[3], self.dim_in[3]}, ["lambda"] = {1, 1}}}, }, ["nerv.AffineLayer"] = { - [ap("mainAffineL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2], self.dim_in[3]}, + [ap("mainAffineL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]}, ["dim_out"] = {self.dim_out[1]}, ["pr"] = pr}}, }, ["nerv.TanhLayer"] = { @@ -61,21 +61,20 @@ function LSTMLayerT:__init(id, global_conf, layer_conf) [ap("inputXDup[1]")] = ap("mainAffineL[1]"), [ap("inputHDup[1]")] = ap("mainAffineL[2]"), - [ap("inputCDup[1]")] = ap("mainAffineL[3]"), [ap("mainAffineL[1]")] = ap("mainTanhL[1]"), [ap("inputXDup[2]")] = ap("inputGateL[1]"), [ap("inputHDup[2]")] = ap("inputGateL[2]"), - [ap("inputCDup[2]")] = ap("inputGateL[3]"), + [ap("inputCDup[1]")] = ap("inputGateL[3]"), [ap("inputXDup[3]")] = ap("forgetGateL[1]"), [ap("inputHDup[3]")] = ap("forgetGateL[2]"), - [ap("inputCDup[3]")] = ap("forgetGateL[3]"), + [ap("inputCDup[2]")] = ap("forgetGateL[3]"), [ap("mainTanhL[1]")] = ap("inputGMulL[1]"), [ap("inputGateL[1]")] = ap("inputGMulL[2]"), - [ap("inputCDup[4]")] = ap("forgetGMulL[1]"), + [ap("inputCDup[3]")] = ap("forgetGMulL[1]"), [ap("forgetGateL[1]")] = ap("forgetGMulL[2]"), [ap("inputGMulL[1]")] = ap("mainCDup[1]"), |