aboutsummaryrefslogtreecommitdiff
path: root/nerv/layer
diff options
context:
space:
mode:
Diffstat (limited to 'nerv/layer')
-rw-r--r--nerv/layer/affine.lua37
-rw-r--r--nerv/layer/combiner.lua6
-rw-r--r--nerv/layer/sigmoid.lua8
-rw-r--r--nerv/layer/softmax.lua9
-rw-r--r--nerv/layer/softmax_ce.lua8
5 files changed, 57 insertions, 11 deletions
diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua
index 00cbcfb..56a32f9 100644
--- a/nerv/layer/affine.lua
+++ b/nerv/layer/affine.lua
@@ -19,19 +19,19 @@ end
function MatrixParam:update(gradient)
local gconf = self.gconf
- self.correction:add(self.correction, gradient, gconf.momentum, 1.0)
+ self.correction:add(self.correction, gradient, gconf.momentum, 1.0, nerv.context)
-- momentum gain
local mmt_gain = 1.0 / (1.0 - gconf.momentum);
local n = self.gconf.batch_size * mmt_gain
-- perform update
- self.trans:add(self.trans, self.correction, 1.0, -gconf.lrate / n)
+ self.trans:add(self.trans, self.correction, 1.0, -gconf.lrate / n, nerv.context)
end
function LinearTransParam:update(gradient)
MatrixParam.update(self, gradient)
local gconf = self.gconf
-- weight decay
- self.trans:add(self.trans, self.trans, 1.0, -gconf.lrate * gconf.wcost)
+ self.trans:add(self.trans, self.trans, 1.0, -gconf.lrate * gconf.wcost, nerv.context)
end
function AffineLayer:__init(id, global_conf, layer_conf)
@@ -61,29 +61,50 @@ function AffineLayer:init(batch_size)
end
function AffineLayer:update(bp_err, input, output)
+ --print(nerv.context)
if self.direct_update then
- self.ltp.correction:mul(input[1], bp_err[1], 1.0, gconf.momentum, 'T', 'N')
+ self.ltp.correction:mul(input[1], bp_err[1], 1.0, gconf.momentum, 'T', 'N', nerv.context)
-- momentum gain
local mmt_gain = 1.0 / (1.0 - gconf.momentum);
local n = self.gconf.batch_size * mmt_gain
-- perform update
- self.ltp.trans:add(self.ltp.trans, self.ltp.correction, 1.0, -gconf.lrate / n)
+ self.ltp.trans:add(self.ltp.trans, self.ltp.correction, 1.0, -gconf.lrate / n, nerv.context)
else
- self.ltp_grad:mul(input[1], bp_err[1], 1.0, 0.0, 'T', 'N')
+ self.ltp_grad:mul(input[1], bp_err[1], 1.0, 0.0, 'T', 'N', nerv.context)
self.ltp:update(self.ltp_grad)
end
self.bp:update(bp_err[1]:colsum())
end
+function AffineLayer:gradient(bp_err, input, output)
+
+ self.ltp.correction:mul(input[1], bp_err[1], 1.0, gconf.momentum, 'T', 'N', nerv.context)
+ self.bp_grad = bp_err[1]:colsum()
+ self.bp.correction:add(self.bp.correction, self.bp_grad, gconf.momentum, 1.0, nerv.context)
+end
+
+function AffineLayer:update_gradient()
+ -- momentum gain
+ local mmt_gain = 1.0 / (1.0 - gconf.momentum);
+ local n = self.gconf.batch_size * mmt_gain
+ -- perform update
+ self.ltp.trans:add(self.ltp.trans, self.ltp.correction, 1.0, -gconf.lrate / n, nerv.context)
+ self.bp.trans:add(self.bp.trans, self.bp.correction, 1.0, -gconf.lrate / n, nerv.context)
+
+ self.ltp.trans:add(self.ltp.trans, self.ltp.trans, 1.0, -gconf.lrate * gconf.wcost, nerv.context)
+ self.bp.trans:add(self.bp.trans, self.bp.trans, 1.0, -gconf.lrate * gconf.wcost, nerv.context)
+end
+
function AffineLayer:propagate(input, output)
-- apply linear transform
- output[1]:mul(input[1], self.ltp.trans, 1.0, 0.0, 'N', 'N')
+ --print(nerv.context)
+ output[1]:mul(input[1], self.ltp.trans, 1.0, 0.0, 'N', 'N', nerv.context)
-- add bias
output[1]:add_row(self.bp.trans, 1.0)
end
function AffineLayer:back_propagate(bp_err, next_bp_err, input, output)
- next_bp_err[1]:mul(bp_err[1], self.ltp.trans, 1.0, 0.0, 'N', 'T')
+ next_bp_err[1]:mul(bp_err[1], self.ltp.trans, 1.0, 0.0, 'N', 'T', nerv.context)
end
function AffineLayer:get_params()
diff --git a/nerv/layer/combiner.lua b/nerv/layer/combiner.lua
index 7bd7617..23cf1db 100644
--- a/nerv/layer/combiner.lua
+++ b/nerv/layer/combiner.lua
@@ -36,7 +36,7 @@ end
function CombinerLayer:propagate(input, output)
output[1]:fill(0)
for i = 1, #self.dim_in do
- output[1]:add(output[1], input[i], 1.0, self.lambda[i])
+ output[1]:add(output[1], input[i], 1.0, self.lambda[i], nerv.context)
end
for i = 2, #self.dim_out do
output[i]:copy_fromd(output[1])
@@ -47,10 +47,10 @@ function CombinerLayer:back_propagate(bp_err, next_bp_err, input, output)
local sum = self.sum
sum:copy_fromd(bp_err[1])
for i = 2, #self.dim_out do
- sum:add(sum, bp_err[i], 1.0, 1.0)
+ sum:add(sum, bp_err[i], 1.0, 1.0, nerv.context)
end
for i = 1, #self.dim_in do
- next_bp_err[i]:add(next_bp_err[i], sum, 0.0, self.lambda[i])
+ next_bp_err[i]:add(next_bp_err[i], sum, 0.0, self.lambda[i], nerv.context)
end
end
diff --git a/nerv/layer/sigmoid.lua b/nerv/layer/sigmoid.lua
index dfd09eb..f6f1417 100644
--- a/nerv/layer/sigmoid.lua
+++ b/nerv/layer/sigmoid.lua
@@ -18,6 +18,14 @@ function SigmoidLayer:update(bp_err, input, output)
-- no params, therefore do nothing
end
+function SigmoidLayer:gradient(bp_err, input, output)
+ -- no params, therefore do nothing
+end
+
+function SigmoidLayer:update_gradient()
+ -- no params, therefore do nothing
+end
+
function SigmoidLayer:propagate(input, output)
output[1]:sigmoid(input[1])
end
diff --git a/nerv/layer/softmax.lua b/nerv/layer/softmax.lua
index e979ebf..7e9c6f0 100644
--- a/nerv/layer/softmax.lua
+++ b/nerv/layer/softmax.lua
@@ -18,6 +18,15 @@ function SoftmaxLayer:update(bp_err, input, output)
-- no params, therefore do nothing
end
+function SoftmaxLayer:gradient(bp_err, input, output)
+ -- no params, therefore do nothing
+end
+
+function SoftmaxLayer:update_gradient()
+ -- no params, therefore do nothing
+end
+
+
function SoftmaxLayer:propagate(input, output)
output[1]:softmax(input[1])
end
diff --git a/nerv/layer/softmax_ce.lua b/nerv/layer/softmax_ce.lua
index f878a2f..42adbc6 100644
--- a/nerv/layer/softmax_ce.lua
+++ b/nerv/layer/softmax_ce.lua
@@ -27,6 +27,14 @@ function SoftmaxCELayer:update(bp_err, input, output)
-- no params, therefore do nothing
end
+function SoftmaxCELayer:gradient(bp_err, input, output)
+ -- no params, therefore do nothing
+end
+
+function SoftmaxCELayer:update_gradient(bp_err, input, output)
+ -- no params, therefore do nothing
+end
+
function SoftmaxCELayer:propagate(input, output)
local softmax = self.softmax
local ce = self.ce