aboutsummaryrefslogtreecommitdiff
path: root/nerv/layer
diff options
context:
space:
mode:
Diffstat (limited to 'nerv/layer')
-rw-r--r--nerv/layer/lstm.lua11
-rw-r--r--nerv/layer/lstm_gate.lua7
2 files changed, 14 insertions, 4 deletions
diff --git a/nerv/layer/lstm.lua b/nerv/layer/lstm.lua
index 500bd87..b0cfe08 100644
--- a/nerv/layer/lstm.lua
+++ b/nerv/layer/lstm.lua
@@ -19,7 +19,7 @@ function LSTMLayer:__init(id, global_conf, layer_conf)
return self.id .. '.' .. str
end
local din1, din2, din3 = self.dim_in[1], self.dim_in[2], self.dim_in[3]
- local dout1, dout2, dout3 = self.dim_out[1], self.dim_out[2], self.dim_out[3]
+ local dout1, dout2 = self.dim_out[1], self.dim_out[2]
local layers = {
["nerv.CombinerLayer"] = {
[ap("inputXDup")] = {{}, {dim_in = {din1},
@@ -49,11 +49,14 @@ function LSTMLayer:__init(id, global_conf, layer_conf)
},
["nerv.LSTMGateLayer"] = {
[ap("forgetGateL")] = {{}, {dim_in = {din1, din2, din3},
- dim_out = {din3}, pr = pr}},
+ dim_out = {din3}, pr = pr},
+ param_type = {'N', 'N', 'D'}},
[ap("inputGateL")] = {{}, {dim_in = {din1, din2, din3},
- dim_out = {din3}, pr = pr}},
+ dim_out = {din3}, pr = pr},
+ param_tpye = {'N', 'N', 'D'}},
[ap("outputGateL")] = {{}, {dim_in = {din1, din2, din3},
- dim_out = {din3}, pr = pr}},
+ dim_out = {din3}, pr = pr},
+ param_type = {'N', 'N', 'D'}},
},
["nerv.ElemMulLayer"] = {
diff --git a/nerv/layer/lstm_gate.lua b/nerv/layer/lstm_gate.lua
index 1963eba..8785b4f 100644
--- a/nerv/layer/lstm_gate.lua
+++ b/nerv/layer/lstm_gate.lua
@@ -5,12 +5,16 @@ function LSTMGateLayer:__init(id, global_conf, layer_conf)
self.id = id
self.dim_in = layer_conf.dim_in
self.dim_out = layer_conf.dim_out
+ self.param_type = layer_conf.param_type
self.gconf = global_conf
for i = 1, #self.dim_in do
self["ltp" .. i] = self:find_param("ltp" .. i, layer_conf, global_conf,
nerv.LinearTransParam,
{self.dim_in[i], self.dim_out[1]})
+ if self.param_type[i] == 'D' then
+ self["ltp" .. i].trans:diagonalize()
+ end
end
self.bp = self:find_param("bp", layer_conf, global_conf,
nerv.BiasParam, {1, self.dim_out[1]})
@@ -64,6 +68,9 @@ function LSTMGateLayer:update(bp_err, input, output)
self.err_bakm:sigmoid_grad(bp_err[1], output[1])
for i = 1, #self.dim_in do
self["ltp" .. i]:update_by_err_input(self.err_bakm, input[i])
+ if self.param_type[i] == 'D' then
+ self["ltp" .. i].trans:diagonalize()
+ end
end
self.bp:update_by_gradient(self.err_bakm:colsum())
end