aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--nerv/examples/lmptb/tnn_ptb_main.lua2
-rw-r--r--nerv/layer/affine_recurrent.lua4
2 files changed, 3 insertions, 3 deletions
diff --git a/nerv/examples/lmptb/tnn_ptb_main.lua b/nerv/examples/lmptb/tnn_ptb_main.lua
index 19d0f8a..491d4b7 100644
--- a/nerv/examples/lmptb/tnn_ptb_main.lua
+++ b/nerv/examples/lmptb/tnn_ptb_main.lua
@@ -165,7 +165,7 @@ test_fn = data_dir .. '/ptb.test.txt.adds'
vocab_fn = data_dir .. '/vocab'
global_conf = {
- lrate = 1, wcost = 1e-5, momentum = 0,
+ lrate = 1, wcost = 1e-5, momentum = 0.9,
cumat_type = nerv.CuMatrixFloat,
mmat_type = nerv.MMatrixFloat,
nn_act_default = 0,
diff --git a/nerv/layer/affine_recurrent.lua b/nerv/layer/affine_recurrent.lua
index 3d448d3..c8cd382 100644
--- a/nerv/layer/affine_recurrent.lua
+++ b/nerv/layer/affine_recurrent.lua
@@ -54,8 +54,8 @@ function Recurrent:update(bp_err, input, output)
self.ltp_hh.correction:mul(input[2], bp_err[1], 1.0, gconf.momentum, 'T', 'N')
self.bp.correction:add(self.bp.correction, bp_err[1]:colsum(), gconf.momentum, 1.0)
-- perform update and weight decay
- ltp_hh:add(ltp_hh, self.ltp_hh.correction, 1.0 - gconf.lrate.gconf.wcost/n, -gconf.lrate / n)
- bp:add(bp, self.bp.correction, 1.0 - gconf.lrate*gconf.wcost/n, -gconf.lrate / n)
+ ltp_hh:add(ltp_hh, self.ltp_hh.correction, 1.0-gconf.lrate*gconf.wcost/gconf.batch_size, -gconf.lrate/n)
+ bp:add(bp, self.bp.correction, 1.0-gconf.lrate*gconf.wcost/gconf.batch_size, -gconf.lrate/n)
else
ltp_hh:mul(input[2], bp_err[1], -gconf.lrate/gconf.batch_size, 1.0-gconf.wcost*gconf.lrate/gconf.batch_size, 'T', 'N')
bp:add(bp, bp_err[1]:colsum(), 1.0-gconf.lrate*gconf.wcost/gconf.batch_size, -gconf.lrate/gconf.batch_size)