diff options
Diffstat (limited to 'nerv/layer')
-rw-r--r-- | nerv/layer/affine.lua | 37 | ||||
-rw-r--r-- | nerv/layer/combiner.lua | 6 | ||||
-rw-r--r-- | nerv/layer/sigmoid.lua | 8 | ||||
-rw-r--r-- | nerv/layer/softmax.lua | 9 | ||||
-rw-r--r-- | nerv/layer/softmax_ce.lua | 8 |
5 files changed, 57 insertions, 11 deletions
diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua index 00cbcfb..56a32f9 100644 --- a/nerv/layer/affine.lua +++ b/nerv/layer/affine.lua @@ -19,19 +19,19 @@ end function MatrixParam:update(gradient) local gconf = self.gconf - self.correction:add(self.correction, gradient, gconf.momentum, 1.0) + self.correction:add(self.correction, gradient, gconf.momentum, 1.0, nerv.context) -- momentum gain local mmt_gain = 1.0 / (1.0 - gconf.momentum); local n = self.gconf.batch_size * mmt_gain -- perform update - self.trans:add(self.trans, self.correction, 1.0, -gconf.lrate / n) + self.trans:add(self.trans, self.correction, 1.0, -gconf.lrate / n, nerv.context) end function LinearTransParam:update(gradient) MatrixParam.update(self, gradient) local gconf = self.gconf -- weight decay - self.trans:add(self.trans, self.trans, 1.0, -gconf.lrate * gconf.wcost) + self.trans:add(self.trans, self.trans, 1.0, -gconf.lrate * gconf.wcost, nerv.context) end function AffineLayer:__init(id, global_conf, layer_conf) @@ -61,29 +61,50 @@ function AffineLayer:init(batch_size) end function AffineLayer:update(bp_err, input, output) + --print(nerv.context) if self.direct_update then - self.ltp.correction:mul(input[1], bp_err[1], 1.0, gconf.momentum, 'T', 'N') + self.ltp.correction:mul(input[1], bp_err[1], 1.0, gconf.momentum, 'T', 'N', nerv.context) -- momentum gain local mmt_gain = 1.0 / (1.0 - gconf.momentum); local n = self.gconf.batch_size * mmt_gain -- perform update - self.ltp.trans:add(self.ltp.trans, self.ltp.correction, 1.0, -gconf.lrate / n) + self.ltp.trans:add(self.ltp.trans, self.ltp.correction, 1.0, -gconf.lrate / n, nerv.context) else - self.ltp_grad:mul(input[1], bp_err[1], 1.0, 0.0, 'T', 'N') + self.ltp_grad:mul(input[1], bp_err[1], 1.0, 0.0, 'T', 'N', nerv.context) self.ltp:update(self.ltp_grad) end self.bp:update(bp_err[1]:colsum()) end +function AffineLayer:gradient(bp_err, input, output) + + self.ltp.correction:mul(input[1], bp_err[1], 1.0, gconf.momentum, 'T', 'N', nerv.context) + self.bp_grad = bp_err[1]:colsum() + self.bp.correction:add(self.bp.correction, self.bp_grad, gconf.momentum, 1.0, nerv.context) +end + +function AffineLayer:update_gradient() + -- momentum gain + local mmt_gain = 1.0 / (1.0 - gconf.momentum); + local n = self.gconf.batch_size * mmt_gain + -- perform update + self.ltp.trans:add(self.ltp.trans, self.ltp.correction, 1.0, -gconf.lrate / n, nerv.context) + self.bp.trans:add(self.bp.trans, self.bp.correction, 1.0, -gconf.lrate / n, nerv.context) + + self.ltp.trans:add(self.ltp.trans, self.ltp.trans, 1.0, -gconf.lrate * gconf.wcost, nerv.context) + self.bp.trans:add(self.bp.trans, self.bp.trans, 1.0, -gconf.lrate * gconf.wcost, nerv.context) +end + function AffineLayer:propagate(input, output) -- apply linear transform - output[1]:mul(input[1], self.ltp.trans, 1.0, 0.0, 'N', 'N') + --print(nerv.context) + output[1]:mul(input[1], self.ltp.trans, 1.0, 0.0, 'N', 'N', nerv.context) -- add bias output[1]:add_row(self.bp.trans, 1.0) end function AffineLayer:back_propagate(bp_err, next_bp_err, input, output) - next_bp_err[1]:mul(bp_err[1], self.ltp.trans, 1.0, 0.0, 'N', 'T') + next_bp_err[1]:mul(bp_err[1], self.ltp.trans, 1.0, 0.0, 'N', 'T', nerv.context) end function AffineLayer:get_params() diff --git a/nerv/layer/combiner.lua b/nerv/layer/combiner.lua index 7bd7617..23cf1db 100644 --- a/nerv/layer/combiner.lua +++ b/nerv/layer/combiner.lua @@ -36,7 +36,7 @@ end function CombinerLayer:propagate(input, output) output[1]:fill(0) for i = 1, #self.dim_in do - output[1]:add(output[1], input[i], 1.0, self.lambda[i]) + output[1]:add(output[1], input[i], 1.0, self.lambda[i], nerv.context) end for i = 2, #self.dim_out do output[i]:copy_fromd(output[1]) @@ -47,10 +47,10 @@ function CombinerLayer:back_propagate(bp_err, next_bp_err, input, output) local sum = self.sum sum:copy_fromd(bp_err[1]) for i = 2, #self.dim_out do - sum:add(sum, bp_err[i], 1.0, 1.0) + sum:add(sum, bp_err[i], 1.0, 1.0, nerv.context) end for i = 1, #self.dim_in do - next_bp_err[i]:add(next_bp_err[i], sum, 0.0, self.lambda[i]) + next_bp_err[i]:add(next_bp_err[i], sum, 0.0, self.lambda[i], nerv.context) end end diff --git a/nerv/layer/sigmoid.lua b/nerv/layer/sigmoid.lua index dfd09eb..f6f1417 100644 --- a/nerv/layer/sigmoid.lua +++ b/nerv/layer/sigmoid.lua @@ -18,6 +18,14 @@ function SigmoidLayer:update(bp_err, input, output) -- no params, therefore do nothing end +function SigmoidLayer:gradient(bp_err, input, output) + -- no params, therefore do nothing +end + +function SigmoidLayer:update_gradient() + -- no params, therefore do nothing +end + function SigmoidLayer:propagate(input, output) output[1]:sigmoid(input[1]) end diff --git a/nerv/layer/softmax.lua b/nerv/layer/softmax.lua index e979ebf..7e9c6f0 100644 --- a/nerv/layer/softmax.lua +++ b/nerv/layer/softmax.lua @@ -18,6 +18,15 @@ function SoftmaxLayer:update(bp_err, input, output) -- no params, therefore do nothing end +function SoftmaxLayer:gradient(bp_err, input, output) + -- no params, therefore do nothing +end + +function SoftmaxLayer:update_gradient() + -- no params, therefore do nothing +end + + function SoftmaxLayer:propagate(input, output) output[1]:softmax(input[1]) end diff --git a/nerv/layer/softmax_ce.lua b/nerv/layer/softmax_ce.lua index f878a2f..42adbc6 100644 --- a/nerv/layer/softmax_ce.lua +++ b/nerv/layer/softmax_ce.lua @@ -27,6 +27,14 @@ function SoftmaxCELayer:update(bp_err, input, output) -- no params, therefore do nothing end +function SoftmaxCELayer:gradient(bp_err, input, output) + -- no params, therefore do nothing +end + +function SoftmaxCELayer:update_gradient(bp_err, input, output) + -- no params, therefore do nothing +end + function SoftmaxCELayer:propagate(input, output) local softmax = self.softmax local ce = self.ce |