diff options
-rw-r--r-- | nerv/examples/lmptb/lmptb/lstm_t_v2.lua | 37 |
1 files changed, 17 insertions, 20 deletions
diff --git a/nerv/examples/lmptb/lmptb/lstm_t_v2.lua b/nerv/examples/lmptb/lmptb/lstm_t_v2.lua index dc2fe45..e7bf3a7 100644 --- a/nerv/examples/lmptb/lmptb/lstm_t_v2.lua +++ b/nerv/examples/lmptb/lmptb/lstm_t_v2.lua @@ -1,5 +1,4 @@ -local LSTMLayerT = nerv.class('nerv.LSTMLayerTv2', 'nerv.LayerT') ---a version of LSTM that only feed h into the gates +local LSTMLayerT = nerv.class('nerv.LSTMLayerV2T', 'nerv.LayerT') function LSTMLayerT:__init(id, global_conf, layer_conf) --input1:x input2:h input3:c @@ -21,13 +20,13 @@ function LSTMLayerT:__init(id, global_conf, layer_conf) local layers = { ["nerv.CombinerLayer"] = { [ap("inputXDup")] = {{}, {["dim_in"] = {self.dim_in[1]}, - ["dim_out"] = {self.dim_in[1], self.dim_in[1], self.dim_in[1], self.dim_in[1]}, ["lambda"] = {1}}}, + ["dim_out"] = {self.dim_in[1], self.dim_in[1], self.dim_in[1]}, ["lambda"] = {1}}}, [ap("inputHDup")] = {{}, {["dim_in"] = {self.dim_in[2]}, - ["dim_out"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2], self.dim_in[2]}, ["lambda"] = {1}}}, - [ap("inputCDup")] = {{}, {["dim_in"] = {self.dim_in[3]}, - ["dim_out"] = {self.dim_in[3]}, ["lambda"] = {1}}}, - [ap("mainCDup")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, - ["dim_out"] = {self.dim_in[3], self.dim_in[3]}, ["lambda"] = {1, 1}}}, + ["dim_out"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2]}, ["lambda"] = {1}}}, + --[ap("inputCDup")] = {{}, {["dim_in"] = {self.dim_in[3]}, + --["dim_out"] = {self.dim_in[3], self.dim_in[3], self.dim_in[3]}, ["lambda"] = {1}}}, + [ap("mainCDup")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3], self.dim_in[3]}, + ["lambda"] = {1, 1}}}, }, ["nerv.AffineLayer"] = { [ap("mainAffineL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]}, @@ -42,14 +41,14 @@ function LSTMLayerT:__init(id, global_conf, layer_conf) ["dim_out"] = {self.dim_in[3]}, ["pr"] = pr}}, [ap("inputGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]}, ["dim_out"] = {self.dim_in[3]}, ["pr"] = pr}}, - [ap("outputGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]}, - ["dim_out"] = {self.dim_in[3]}, ["pr"] = pr}}, + --[ap("outputGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2], self.dim_in[3]}, + -- ["dim_out"] = {self.dim_in[3]}, ["pr"] = pr}}, }, ["nerv.ElemMulLayer"] = { [ap("inputGMulL")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3]}}}, [ap("forgetGMulL")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3]}}}, - [ap("outputGMulL")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3]}}}, + --[ap("outputGMulL")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3]}}}, }, } @@ -58,38 +57,36 @@ function LSTMLayerT:__init(id, global_conf, layer_conf) local connections_t = { ["<input>[1]"] = ap("inputXDup[1]"), ["<input>[2]"] = ap("inputHDup[1]"), - ["<input>[3]"] = ap("inputCDup[1]"), [ap("inputXDup[1]")] = ap("mainAffineL[1]"), [ap("inputHDup[1]")] = ap("mainAffineL[2]"), - [ap("mainAffineL[1]")] = ap("mainTanhL[1]"), [ap("inputXDup[2]")] = ap("inputGateL[1]"), [ap("inputHDup[2]")] = ap("inputGateL[2]"), + --[ap("inputCDup[1]")] = ap("inputGateL[3]"), [ap("inputXDup[3]")] = ap("forgetGateL[1]"), [ap("inputHDup[3]")] = ap("forgetGateL[2]"), + --[ap("inputCDup[2]")] = ap("forgetGateL[3]"), [ap("mainTanhL[1]")] = ap("inputGMulL[1]"), [ap("inputGateL[1]")] = ap("inputGMulL[2]"), - [ap("inputCDup[1]")] = ap("forgetGMulL[1]"), + [ap("<input>[3]")] = ap("forgetGMulL[1]"), [ap("forgetGateL[1]")] = ap("forgetGMulL[2]"), [ap("inputGMulL[1]")] = ap("mainCDup[1]"), [ap("forgetGMulL[1]")] = ap("mainCDup[2]"), - [ap("inputXDup[4]")] = ap("outputGateL[1]"), - [ap("inputHDup[4]")] = ap("outputGateL[2]"), + --[ap("inputXDup[4]")] = ap("outputGateL[1]"), + --[ap("inputHDup[4]")] = ap("outputGateL[2]"), + --[ap("mainCDup[3]")] = ap("outputGateL[3]"), [ap("mainCDup[2]")] = "<output>[2]", [ap("mainCDup[1]")] = ap("outputTanhL[1]"), - [ap("outputTanhL[1]")] = ap("outputGMulL[1]"), - [ap("outputGateL[1]")] = ap("outputGMulL[2]"), - - [ap("outputGMulL[1]")] = "<output>[1]", + [ap("outputTanhL[1]")] = "<output>[1]", } self.dagL = nerv.DAGLayerT(self.id, global_conf, {["dim_in"] = self.dim_in, ["dim_out"] = self.dim_out, ["sub_layers"] = layerRepo, |