From c589c3aabaae7f3867bdfed994c8179a87f42675 Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Tue, 29 Mar 2016 10:05:29 +0800 Subject: fix bug of momentum & update mse layer --- nerv/layer/affine.lua | 43 +++++++++++++++++++------------------------ 1 file changed, 19 insertions(+), 24 deletions(-) (limited to 'nerv/layer/affine.lua') diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua index 38743aa..a05ae17 100644 --- a/nerv/layer/affine.lua +++ b/nerv/layer/affine.lua @@ -25,7 +25,9 @@ end function MatrixParam:train_init() self.correction = self.trans:create() + self.correction_acc = self.correction:create() self.correction:fill(0) + self.correction_acc:fill(0) end function MatrixParam:copy(copier) @@ -34,46 +36,37 @@ function MatrixParam:copy(copier) return target end -function MatrixParam:_update_by_gradient(gradient, alpha, beta) +function MatrixParam:_update(alpha, beta) local gconf = self.gconf -- momentum gain local mmt_gain = 1.0 / (1.0 - gconf.momentum) local n = gconf.batch_size * mmt_gain -- perform update if gconf.momentum > 0 then - self.correction:add(self.correction, gradient, gconf.momentum, 1.0) + self.correction:add(self.correction, self.correction_acc, gconf.momentum, 1.0) self.trans:add(self.trans, self.correction, alpha, -gconf.lrate / n * beta) else - self.trans:add(self.trans, gradient, alpha, -gconf.lrate / n * beta) + self.trans:add(self.trans, self.correction_acc, alpha, -gconf.lrate / n * beta) end + self.correction_acc:fill(0) end -function MatrixParam:_update_by_err_input(err, input, alpha, beta) - local gconf = self.gconf - -- momentum gain - local mmt_gain = 1.0 / (1.0 - gconf.momentum) - local n = gconf.batch_size * mmt_gain - -- perform update - if gconf.momentum > 0 then - self.correction:mul(input, err, 1.0, gconf.momentum, 'T', 'N') - self.trans:add(self.trans, self.correction, alpha, -gconf.lrate / n * beta) - else - self.trans:mul(input, err, -gconf.lrate / n * beta, alpha, 'T', 'N') - end +function MatrixParam:back_propagate_by_gradient(gradient) + self.correction_acc:add(self.correction_acc, gradient, 1.0, 1.0) end -function MatrixParam:update_by_gradient(gradient) - self:_update_by_gradient(gradient, 1.0, 1.0) +function MatrixParam:back_propagate_by_err_input(err, input) + self.correction_acc:mul(input, err, 1.0, 1.0, 'T', 'N') end -function MatrixParam:update_by_err_input(err, input) - self:_update_by_err_input(err, input, 1.0, 1.0) +function MatrixParam:update_by_gradient() + self:_update(1.0, 1.0) end -function LinearTransParam:update_by_err_input(err, input) +function MatrixParam:update_by_err_input() local gconf = self.gconf local l2 = 1 - gconf.lrate * gconf.wcost - self:_update_by_err_input(err, input, l2, l2) + self:_update(l2, l2) end --- A fully-connected linear transform layer. @@ -121,11 +114,11 @@ function AffineLayer:batch_resize(batch_size) -- do nothing end -function AffineLayer:update(bp_err, input, output) +function AffineLayer:update() for i = 1, #self.dim_in do - self["ltp" .. i]:update_by_err_input(bp_err[1], input[i]) + self["ltp" .. i]:update_by_err_input() end - self.bp:update_by_gradient(bp_err[1]:colsum()) + self.bp:update_by_gradient() end function AffineLayer:propagate(input, output) @@ -141,7 +134,9 @@ end function AffineLayer:back_propagate(bp_err, next_bp_err, input, output) for i = 1, #self.dim_in do next_bp_err[i]:mul(bp_err[1], self["ltp" .. i].trans, 1.0, 0.0, 'N', 'T') + self["ltp" .. i]:back_propagate_by_err_input(bp_err[1], input[i]) end + self.bp:back_propagate_by_gradient(bp_err[1]:colsum()) end function AffineLayer:get_params() -- cgit v1.2.3