aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--nerv/examples/lmptb/lmptb/lstm_t_v2.lua37
1 files changed, 17 insertions, 20 deletions
diff --git a/nerv/examples/lmptb/lmptb/lstm_t_v2.lua b/nerv/examples/lmptb/lmptb/lstm_t_v2.lua
index dc2fe45..e7bf3a7 100644
--- a/nerv/examples/lmptb/lmptb/lstm_t_v2.lua
+++ b/nerv/examples/lmptb/lmptb/lstm_t_v2.lua
@@ -1,5 +1,4 @@
-local LSTMLayerT = nerv.class('nerv.LSTMLayerTv2', 'nerv.LayerT')
---a version of LSTM that only feed h into the gates
+local LSTMLayerT = nerv.class('nerv.LSTMLayerV2T', 'nerv.LayerT')
function LSTMLayerT:__init(id, global_conf, layer_conf)
--input1:x input2:h input3:c
@@ -21,13 +20,13 @@ function LSTMLayerT:__init(id, global_conf, layer_conf)
local layers = {
["nerv.CombinerLayer"] = {
[ap("inputXDup")] = {{}, {["dim_in"] = {self.dim_in[1]},
- ["dim_out"] = {self.dim_in[1], self.dim_in[1], self.dim_in[1], self.dim_in[1]}, ["lambda"] = {1}}},
+ ["dim_out"] = {self.dim_in[1], self.dim_in[1], self.dim_in[1]}, ["lambda"] = {1}}},
[ap("inputHDup")] = {{}, {["dim_in"] = {self.dim_in[2]},
- ["dim_out"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2], self.dim_in[2]}, ["lambda"] = {1}}},
- [ap("inputCDup")] = {{}, {["dim_in"] = {self.dim_in[3]},
- ["dim_out"] = {self.dim_in[3]}, ["lambda"] = {1}}},
- [ap("mainCDup")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]},
- ["dim_out"] = {self.dim_in[3], self.dim_in[3]}, ["lambda"] = {1, 1}}},
+ ["dim_out"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2]}, ["lambda"] = {1}}},
+ --[ap("inputCDup")] = {{}, {["dim_in"] = {self.dim_in[3]},
+ --["dim_out"] = {self.dim_in[3], self.dim_in[3], self.dim_in[3]}, ["lambda"] = {1}}},
+ [ap("mainCDup")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3], self.dim_in[3]},
+ ["lambda"] = {1, 1}}},
},
["nerv.AffineLayer"] = {
[ap("mainAffineL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]},
@@ -42,14 +41,14 @@ function LSTMLayerT:__init(id, global_conf, layer_conf)
["dim_out"] = {self.dim_in[3]}, ["pr"] = pr}},
[ap("inputGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]},
["dim_out"] = {self.dim_in[3]}, ["pr"] = pr}},
- [ap("outputGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]},
- ["dim_out"] = {self.dim_in[3]}, ["pr"] = pr}},
+ --[ap("outputGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2], self.dim_in[3]},
+ -- ["dim_out"] = {self.dim_in[3]}, ["pr"] = pr}},
},
["nerv.ElemMulLayer"] = {
[ap("inputGMulL")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3]}}},
[ap("forgetGMulL")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3]}}},
- [ap("outputGMulL")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3]}}},
+ --[ap("outputGMulL")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3]}}},
},
}
@@ -58,38 +57,36 @@ function LSTMLayerT:__init(id, global_conf, layer_conf)
local connections_t = {
["<input>[1]"] = ap("inputXDup[1]"),
["<input>[2]"] = ap("inputHDup[1]"),
- ["<input>[3]"] = ap("inputCDup[1]"),
[ap("inputXDup[1]")] = ap("mainAffineL[1]"),
[ap("inputHDup[1]")] = ap("mainAffineL[2]"),
-
[ap("mainAffineL[1]")] = ap("mainTanhL[1]"),
[ap("inputXDup[2]")] = ap("inputGateL[1]"),
[ap("inputHDup[2]")] = ap("inputGateL[2]"),
+ --[ap("inputCDup[1]")] = ap("inputGateL[3]"),
[ap("inputXDup[3]")] = ap("forgetGateL[1]"),
[ap("inputHDup[3]")] = ap("forgetGateL[2]"),
+ --[ap("inputCDup[2]")] = ap("forgetGateL[3]"),
[ap("mainTanhL[1]")] = ap("inputGMulL[1]"),
[ap("inputGateL[1]")] = ap("inputGMulL[2]"),
- [ap("inputCDup[1]")] = ap("forgetGMulL[1]"),
+ [ap("<input>[3]")] = ap("forgetGMulL[1]"),
[ap("forgetGateL[1]")] = ap("forgetGMulL[2]"),
[ap("inputGMulL[1]")] = ap("mainCDup[1]"),
[ap("forgetGMulL[1]")] = ap("mainCDup[2]"),
- [ap("inputXDup[4]")] = ap("outputGateL[1]"),
- [ap("inputHDup[4]")] = ap("outputGateL[2]"),
+ --[ap("inputXDup[4]")] = ap("outputGateL[1]"),
+ --[ap("inputHDup[4]")] = ap("outputGateL[2]"),
+ --[ap("mainCDup[3]")] = ap("outputGateL[3]"),
[ap("mainCDup[2]")] = "<output>[2]",
[ap("mainCDup[1]")] = ap("outputTanhL[1]"),
- [ap("outputTanhL[1]")] = ap("outputGMulL[1]"),
- [ap("outputGateL[1]")] = ap("outputGMulL[2]"),
-
- [ap("outputGMulL[1]")] = "<output>[1]",
+ [ap("outputTanhL[1]")] = "<output>[1]",
}
self.dagL = nerv.DAGLayerT(self.id, global_conf,
{["dim_in"] = self.dim_in, ["dim_out"] = self.dim_out, ["sub_layers"] = layerRepo,