diff options
author | txh18 <[email protected]> | 2015-11-16 20:14:02 +0800 |
---|---|---|
committer | txh18 <[email protected]> | 2015-11-16 20:14:02 +0800 |
commit | 03a5ad963ee381eaee1de24d1def52bba9b71736 (patch) | |
tree | e4caed009f379e74d94dd24c6f07ae0a6632ea8b /nerv/layer/affine.lua | |
parent | a9300a1f6b3a101c5aef712b8f2f6049d4794484 (diff) |
unified param updates, now direct_update is the same speed with undirect_update
Diffstat (limited to 'nerv/layer/affine.lua')
-rw-r--r-- | nerv/layer/affine.lua | 37 |
1 files changed, 28 insertions, 9 deletions
diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua index 0fcff36..c24af16 100644 --- a/nerv/layer/affine.lua +++ b/nerv/layer/affine.lua @@ -19,14 +19,33 @@ end function MatrixParam:update(gradient) local gconf = self.gconf - self.correction:add(self.correction, gradient, gconf.momentum, 1.0) - -- momentum gain - local mmt_gain = 1.0 / (1.0 - gconf.momentum); - local n = self.gconf.batch_size * mmt_gain - -- perform update - self.trans:add(self.trans, self.correction, 1.0 - gconf.lrate*gconf.wcost/gconf.batch_size, -gconf.lrate / n) + if (gconf.momentum > 0) then + self.correction:add(self.correction, gradient, gconf.momentum, 1.0) + -- momentum gain + local mmt_gain = 1.0 / (1.0 - gconf.momentum); + local n = self.gconf.batch_size * mmt_gain + -- perform update + self.trans:add(self.trans, self.correction, 1.0-gconf.lrate*gconf.wcost/gconf.batch_size, -gconf.lrate/n) + else + self.trans:add(self.trans, gradient, 1.0-gconf.lrate*gconf.wcost/gconf.batch_size, -gconf.lrate/gconf.batch_size) + end +end + +function MatrixParam:updateEI(err, input) + local gconf = self.gconf + if (gconf.momentum > 0) then + self.correction:mul(input, err, 1.0, gconf.momentum, 'T', 'N') + -- momentum gain + local mmt_gain = 1.0 / (1.0 - gconf.momentum); + local n = self.gconf.batch_size * mmt_gain + -- perform update + self.trans:add(self.trans, self.correction, 1.0-gconf.lrate*gconf.wcost/gconf.batch_size, -gconf.lrate/n) + else + self.trans:mul(input, err, -gconf.lrate/gconf.batch_size, 1.0-gconf.lrate*gconf.wcost/gconf.batch_size, 'T', 'N') + end end +--[[ --these updates are the same function LinearTransParam:update(gradient) MatrixParam.update(self, gradient) -- local gconf = self.gconf @@ -36,10 +55,11 @@ end function BiasParam:update(gradient) MatrixParam.update(self, gradient) - -- local gconf = self.gconf + --local gconf = self.gconf -- weight decay -- self.trans:add(self.trans, self.trans, 1.0, -gconf.lrate * gconf.wcost / gconf.batch_size) end +]]-- function AffineLayer:__init(id, global_conf, layer_conf) self.id = id @@ -88,8 +108,7 @@ function AffineLayer:update(bp_err, input, output) self.bp.trans:add(self.bp.trans, bp_err[1]:colsum(), 1.0-gconf.lrate*gconf.wcost/gconf.batch_size, -gconf.lrate / gconf.batch_size) end else - self.ltp_grad:mul(input[1], bp_err[1], 1.0, 0.0, 'T', 'N') - self.ltp:update(self.ltp_grad) + self.ltp:updateEI(bp_err[1], input[1]) self.bp:update(bp_err[1]:colsum()) end end |