diff options
-rw-r--r-- | nerv/examples/lmptb/tnn_ptb_main.lua | 2 | ||||
-rw-r--r-- | nerv/layer/affine_recurrent.lua | 4 |
2 files changed, 3 insertions, 3 deletions
diff --git a/nerv/examples/lmptb/tnn_ptb_main.lua b/nerv/examples/lmptb/tnn_ptb_main.lua index 19d0f8a..491d4b7 100644 --- a/nerv/examples/lmptb/tnn_ptb_main.lua +++ b/nerv/examples/lmptb/tnn_ptb_main.lua @@ -165,7 +165,7 @@ test_fn = data_dir .. '/ptb.test.txt.adds' vocab_fn = data_dir .. '/vocab' global_conf = { - lrate = 1, wcost = 1e-5, momentum = 0, + lrate = 1, wcost = 1e-5, momentum = 0.9, cumat_type = nerv.CuMatrixFloat, mmat_type = nerv.MMatrixFloat, nn_act_default = 0, diff --git a/nerv/layer/affine_recurrent.lua b/nerv/layer/affine_recurrent.lua index 3d448d3..c8cd382 100644 --- a/nerv/layer/affine_recurrent.lua +++ b/nerv/layer/affine_recurrent.lua @@ -54,8 +54,8 @@ function Recurrent:update(bp_err, input, output) self.ltp_hh.correction:mul(input[2], bp_err[1], 1.0, gconf.momentum, 'T', 'N') self.bp.correction:add(self.bp.correction, bp_err[1]:colsum(), gconf.momentum, 1.0) -- perform update and weight decay - ltp_hh:add(ltp_hh, self.ltp_hh.correction, 1.0 - gconf.lrate.gconf.wcost/n, -gconf.lrate / n) - bp:add(bp, self.bp.correction, 1.0 - gconf.lrate*gconf.wcost/n, -gconf.lrate / n) + ltp_hh:add(ltp_hh, self.ltp_hh.correction, 1.0-gconf.lrate*gconf.wcost/gconf.batch_size, -gconf.lrate/n) + bp:add(bp, self.bp.correction, 1.0-gconf.lrate*gconf.wcost/gconf.batch_size, -gconf.lrate/n) else ltp_hh:mul(input[2], bp_err[1], -gconf.lrate/gconf.batch_size, 1.0-gconf.wcost*gconf.lrate/gconf.batch_size, 'T', 'N') bp:add(bp, bp_err[1]:colsum(), 1.0-gconf.lrate*gconf.wcost/gconf.batch_size, -gconf.lrate/gconf.batch_size) |