diff options
-rw-r--r-- | nerv/examples/lmptb/lstmlm_ptb_main.lua | 6 | ||||
-rw-r--r-- | nerv/examples/lmptb/tnn/layersT/lstm_t.lua | 19 |
2 files changed, 18 insertions, 7 deletions
diff --git a/nerv/examples/lmptb/lstmlm_ptb_main.lua b/nerv/examples/lmptb/lstmlm_ptb_main.lua index 69f26f5..7ec583d 100644 --- a/nerv/examples/lmptb/lstmlm_ptb_main.lua +++ b/nerv/examples/lmptb/lstmlm_ptb_main.lua @@ -184,12 +184,12 @@ test_fn = data_dir .. '/ptb.test.txt.adds' vocab_fn = data_dir .. '/vocab' global_conf = { - lrate = 0.001, wcost = 1e-6, momentum = 0, clip_t = 0.01, + lrate = 0.1, wcost = 1e-6, momentum = 0, clip_t = 10, cumat_type = nerv.CuMatrixFloat, mmat_type = nerv.MMatrixFloat, nn_act_default = 0, - hidden_size = 200, --set to 400 for a stable good test PPL + hidden_size = 300, --set to 400 for a stable good test PPL chunk_size = 15, batch_size = 10, max_iter = 35, @@ -201,7 +201,7 @@ global_conf = { test_fn = test_fn, vocab_fn = vocab_fn, sche_log_pre = "[SCHEDULER]:", - log_w_num = 400, --give a message when log_w_num words have been processed + log_w_num = 40000, --give a message when log_w_num words have been processed timer = nerv.Timer(), work_dir_base = '/home/slhome/txh18/workspace/nerv/play/ptbEXP/tnn_lstm_test' } diff --git a/nerv/examples/lmptb/tnn/layersT/lstm_t.lua b/nerv/examples/lmptb/tnn/layersT/lstm_t.lua index 409c617..0bd9c76 100644 --- a/nerv/examples/lmptb/tnn/layersT/lstm_t.lua +++ b/nerv/examples/lmptb/tnn/layersT/lstm_t.lua @@ -20,12 +20,12 @@ function LSTMLayerT:__init(id, global_conf, layer_conf) local layers = { ["nerv.CombinerLayer"] = { [ap("inputXDup")] = {{}, {["dim_in"] = {self.dim_in[1]}, - ["dim_out"] = {self.dim_in[1], self.dim_in[1], self.dim_in[1]}, ["lambda"] = {1}}}, + ["dim_out"] = {self.dim_in[1], self.dim_in[1], self.dim_in[1], self.dim_in[1]}, ["lambda"] = {1}}}, [ap("inputHDup")] = {{}, {["dim_in"] = {self.dim_in[2]}, - ["dim_out"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2]}, ["lambda"] = {1}}}, + ["dim_out"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2], self.dim_in[2]}, ["lambda"] = {1}}}, [ap("inputCDup")] = {{}, {["dim_in"] = {self.dim_in[3]}, ["dim_out"] = {self.dim_in[3], self.dim_in[3], self.dim_in[3], self.dim_in[3]}, ["lambda"] = {1}}}, - [ap("mainCDup")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3], self.dim_in[3]}, + [ap("mainCDup")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3], self.dim_in[3], self.dim_in[3]}, ["lambda"] = {1, 1}}}, }, ["nerv.AffineLayer"] = { @@ -41,10 +41,14 @@ function LSTMLayerT:__init(id, global_conf, layer_conf) ["dim_out"] = {self.dim_in[3]}, ["pr"] = pr}}, [ap("inputGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3]}, ["pr"] = pr}}, + [ap("outputGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2], self.dim_in[3]}, + ["dim_out"] = {self.dim_in[3]}, ["pr"] = pr}}, + }, ["nerv.ElemMulLayer"] = { [ap("inputGMulL")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3]}}}, [ap("forgetGMulL")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3]}}}, + [ap("outputGMulL")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3]}}}, }, } @@ -77,10 +81,17 @@ function LSTMLayerT:__init(id, global_conf, layer_conf) [ap("inputGMulL[1]")] = ap("mainCDup[1]"), [ap("forgetGMulL[1]")] = ap("mainCDup[2]"), + [ap("inputXDup[4]")] = ap("outputGateL[1]"), + [ap("inputHDup[4]")] = ap("outputGateL[2]"), + [ap("mainCDup[3]")] = ap("outputGateL[3]"), + [ap("mainCDup[2]")] = "<output>[2]", [ap("mainCDup[1]")] = ap("outputTanhL[1]"), + + [ap("outputTanhL[1]")] = ap("outputGMulL[1]"), + [ap("outputGateL[1]")] = ap("outputGMulL[2]"), - [ap("outputTanhL[1]")] = "<output>[1]", + [ap("outputGMulL[1]")] = "<output>[1]", } self.dagL = nerv.DAGLayerT(self.id, global_conf, {["dim_in"] = self.dim_in, ["dim_out"] = self.dim_out, ["sub_layers"] = layerRepo, |