From 03a5ad963ee381eaee1de24d1def52bba9b71736 Mon Sep 17 00:00:00 2001 From: txh18 Date: Mon, 16 Nov 2015 20:14:02 +0800 Subject: unified param updates, now direct_update is the same speed with undirect_update --- nerv/layer/affine.lua | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) (limited to 'nerv/layer/affine.lua') diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua index 0fcff36..c24af16 100644 --- a/nerv/layer/affine.lua +++ b/nerv/layer/affine.lua @@ -19,14 +19,33 @@ end function MatrixParam:update(gradient) local gconf = self.gconf - self.correction:add(self.correction, gradient, gconf.momentum, 1.0) - -- momentum gain - local mmt_gain = 1.0 / (1.0 - gconf.momentum); - local n = self.gconf.batch_size * mmt_gain - -- perform update - self.trans:add(self.trans, self.correction, 1.0 - gconf.lrate*gconf.wcost/gconf.batch_size, -gconf.lrate / n) + if (gconf.momentum > 0) then + self.correction:add(self.correction, gradient, gconf.momentum, 1.0) + -- momentum gain + local mmt_gain = 1.0 / (1.0 - gconf.momentum); + local n = self.gconf.batch_size * mmt_gain + -- perform update + self.trans:add(self.trans, self.correction, 1.0-gconf.lrate*gconf.wcost/gconf.batch_size, -gconf.lrate/n) + else + self.trans:add(self.trans, gradient, 1.0-gconf.lrate*gconf.wcost/gconf.batch_size, -gconf.lrate/gconf.batch_size) + end +end + +function MatrixParam:updateEI(err, input) + local gconf = self.gconf + if (gconf.momentum > 0) then + self.correction:mul(input, err, 1.0, gconf.momentum, 'T', 'N') + -- momentum gain + local mmt_gain = 1.0 / (1.0 - gconf.momentum); + local n = self.gconf.batch_size * mmt_gain + -- perform update + self.trans:add(self.trans, self.correction, 1.0-gconf.lrate*gconf.wcost/gconf.batch_size, -gconf.lrate/n) + else + self.trans:mul(input, err, -gconf.lrate/gconf.batch_size, 1.0-gconf.lrate*gconf.wcost/gconf.batch_size, 'T', 'N') + end end +--[[ --these updates are the same function LinearTransParam:update(gradient) MatrixParam.update(self, gradient) -- local gconf = self.gconf @@ -36,10 +55,11 @@ end function BiasParam:update(gradient) MatrixParam.update(self, gradient) - -- local gconf = self.gconf + --local gconf = self.gconf -- weight decay -- self.trans:add(self.trans, self.trans, 1.0, -gconf.lrate * gconf.wcost / gconf.batch_size) end +]]-- function AffineLayer:__init(id, global_conf, layer_conf) self.id = id @@ -88,8 +108,7 @@ function AffineLayer:update(bp_err, input, output) self.bp.trans:add(self.bp.trans, bp_err[1]:colsum(), 1.0-gconf.lrate*gconf.wcost/gconf.batch_size, -gconf.lrate / gconf.batch_size) end else - self.ltp_grad:mul(input[1], bp_err[1], 1.0, 0.0, 'T', 'N') - self.ltp:update(self.ltp_grad) + self.ltp:updateEI(bp_err[1], input[1]) self.bp:update(bp_err[1]:colsum()) end end -- cgit v1.2.3