diff options
author | Determinant <[email protected]> | 2015-11-21 19:48:56 +0800 |
---|---|---|
committer | Determinant <[email protected]> | 2015-11-21 19:48:56 +0800 |
commit | d1eb2a18c0adfec52b438eda8602ab2601d12391 (patch) | |
tree | b90989258ff9dc8a07a568ef6ea4d91ae3d9fb62 | |
parent | 369853d0b3f2bd70f5ddce43fa2811adb956333a (diff) |
use consistent update calc; clean up code; no need for `direct_update`
-rw-r--r-- | nerv/layer/affine.lua | 72 |
1 files changed, 26 insertions, 46 deletions
diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua index 6a541e8..02a3536 100644 --- a/nerv/layer/affine.lua +++ b/nerv/layer/affine.lua @@ -17,49 +17,46 @@ function MatrixParam:train_init() self.correction:fill(0) end -function MatrixParam:update_by_gradient(gradient) +function MatrixParam:_update_by_gradient(gradient, alpha, beta) local gconf = self.gconf + -- momentum gain + local mmt_gain = 1.0 / (1.0 - gconf.momentum) + local n = self.gconf.batch_size * mmt_gain + -- perform update if gconf.momentum > 0 then self.correction:add(self.correction, gradient, gconf.momentum, 1.0) - -- momentum gain - local mmt_gain = 1.0 / (1.0 - gconf.momentum) - local n = self.gconf.batch_size * mmt_gain - -- perform update - self.trans:add(self.trans, self.correction, 1.0 - gconf.lrate * gconf.wcost / gconf.batch_size, - gconf.lrate / n) + self.trans:add(self.trans, self.correction, alpha, -gconf.lrate / n * beta) else - self.trans:add(self.trans, gradient, 1.0 - gconf.lrate * gconf.wcost / gconf.batch_size, - gconf.lrate / gconf.batch_size) + self.trans:add(self.trans, gradient, alpha, -gconf.lrate / n * beta) end end -function MatrixParam:update_by_err_input(err, input) +function MatrixParam:_update_by_err_input(err, input, alpha, beta) local gconf = self.gconf + -- momentum gain + local mmt_gain = 1.0 / (1.0 - gconf.momentum) + local n = self.gconf.batch_size * mmt_gain + -- perform update if gconf.momentum > 0 then self.correction:mul(input, err, 1.0, gconf.momentum, 'T', 'N') - -- momentum gain - local mmt_gain = 1.0 / (1.0 - gconf.momentum) - local n = self.gconf.batch_size * mmt_gain - -- perform update - self.trans:add(self.trans, self.correction, 1.0 - gconf.lrate * gconf.wcost / gconf.batch_size, - gconf.lrate / n) + self.trans:add(self.trans, self.correction, alpha, -gconf.lrate / n * beta) else - self.trans:mul(input, err, - gconf.lrate / gconf.batch_size, 1.0 - gconf.lrate * gconf.wcost / gconf.batch_size, 'T', 'N') + self.trans:mul(input, err, -gconf.lrate / n * beta, alpha, 'T', 'N') end end ---[[ --these updates are the same -function LinearTransParam:update(gradient) - MatrixParam.update(self, gradient) - -- local gconf = self.gconf - -- weight decay(put into MatrixParam:update) - -- self.trans:add(self.trans, self.trans, 1.0, -gconf.lrate * gconf.wcost / gconf.batch_size) +function MatrixParam:update_by_gradient(gradient) + self:_update_by_gradient(gradient, 1.0, 1.0) +end + +function MatrixParam:update_by_err_input(err, input) + self:_update_by_err_input(err, input, 1.0, 1.0) end -function BiasParam:update(gradient) - MatrixParam.update(self, gradient) - --local gconf = self.gconf - -- weight decay - -- self.trans:add(self.trans, self.trans, 1.0, -gconf.lrate * gconf.wcost / gconf.batch_size) +function LinearTransParam:update_by_err_input(err, input) + local l2 = 1 - gconf.lrate * gconf.wcost + self:_update_by_err_input(err, input, l2, l2) end -]]-- function AffineLayer:__init(id, global_conf, layer_conf) self.id = id @@ -69,7 +66,7 @@ function AffineLayer:__init(id, global_conf, layer_conf) self.dim_out = layer_conf.dim_out self.gconf = global_conf self:check_dim_len(1, 1) -- exactly one input and one output - self.direct_update = layer_conf.direct_update or global_conf.direct_update + -- self.direct_update = layer_conf.direct_update or global_conf.direct_update end function AffineLayer:init(batch_size) @@ -92,25 +89,8 @@ function AffineLayer:batch_resize(batch_size) end function AffineLayer:update(bp_err, input, output) - if self.direct_update == true then - local gconf = self.gconf - if gconf.momentum > 0 then - self.ltp.correction:mul(input[1], bp_err[1], 1.0, gconf.momentum, 'T', 'N') - self.bp.correction:add(self.bp.correction, bp_err[1]:colsum(), gconf.momentum, 1) - -- momentum gain - local mmt_gain = 1.0 / (1.0 - gconf.momentum) - local n = self.gconf.batch_size * mmt_gain - -- perform update - self.ltp.trans:add(self.ltp.trans, self.ltp.correction, 1.0 - gconf.lrate * gconf.wcost / gconf.batch_size, - gconf.lrate / n) - self.bp.trans:add(self.bp.trans, self.bp.correction, 1.0 - gconf.lrate * gconf.wcost / gconf.batch_size, - gconf.lrate / n) - else - self.ltp.trans:mul(input[1], bp_err[1], - gconf.lrate / gconf.batch_size, 1.0 - gconf.lrate * gconf.wcost / gconf.batch_size, 'T', 'N') - self.bp.trans:add(self.bp.trans, bp_err[1]:colsum(), 1.0 - gconf.lrate * gconf.wcost / gconf.batch_size, - gconf.lrate / gconf.batch_size) - end - else - self.ltp:update_by_err_input(bp_err[1], input[1]) - self.bp:update_by_gradient(bp_err[1]:colsum()) - end + self.ltp:update_by_err_input(bp_err[1], input[1]) + self.bp:update_by_gradient(bp_err[1]:colsum()) end function AffineLayer:propagate(input, output) |