From d13a7d17fd724f84423393e81dd3d62124f6669b Mon Sep 17 00:00:00 2001 From: txh18 Date: Thu, 26 Nov 2015 15:55:46 +0800 Subject: working on lstm --- nerv/examples/lmptb/tnn/layersT/lstm_t.lua | 76 +++++++++++++++++++++++++++--- 1 file changed, 70 insertions(+), 6 deletions(-) diff --git a/nerv/examples/lmptb/tnn/layersT/lstm_t.lua b/nerv/examples/lmptb/tnn/layersT/lstm_t.lua index 5b653a3..4ec2e54 100644 --- a/nerv/examples/lmptb/tnn/layersT/lstm_t.lua +++ b/nerv/examples/lmptb/tnn/layersT/lstm_t.lua @@ -1,6 +1,6 @@ local LSTMLayerT = nerv.class('nerv.LSTMLayerT', 'nerv.LayerT') -function LSTMLayerT:__init(id, gilobal_conf, layer_conf) +function LSTMLayerT:__init(id, global_conf, layer_conf) --input1:x input2:h input3:c self.id = id self.dim_in = layer_conf.dim_in @@ -8,13 +8,77 @@ function LSTMLayerT:__init(id, gilobal_conf, layer_conf) self.gconf = global_conf --prepare a DAGLayerT to hold the lstm structure - local paramRepo = nerv.ParamRepo() + local pr = layer_conf.pr + if pr == nil then + pr = nerv.ParamRepo() + end + + local function ap(str) + return self.id .. '.' .. str + end + local layers = { - ["nerv.IndRecurrentLayer"] = { - ["recurrentL1"] = recurrentLconfig, - }} + ["nerv.CombinerLayer"] = { + [ap("inputXDup")] = {{}, {["dim_in"] = {self.dim_in[1]}, ["dim_out"] = {self.dim_in[1], self.dim_in[1], self.dim_in[1]}}}, + [ap("inputHDup")] = {{}, {["dim_in"] = {self.dim_in[2]}, ["dim_out"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2]}}}, + [ap("inputCDup")] = {{}, {["dim_in"] = {self.dim_in[3]}, + ["dim_out"] = {self.dim_in[3], self.dim_in[3], self.dim_in[3], self.dim_in[3]}}}, + [ap("mainCDup")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3], self.dim_in[3]}, + ["lambda"] = {1, 1}}}, + }, + ["nerv.AffineLayer"] = { + [ap("mainAffineL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2], self.dim_in[3]}, + ["dim_out"] = {self.dim_out[1]}, ["pr"] = pr}}, + }, + ["nerv.TanhLayer"] = { + [ap("mainTanhL")] = {{}, {["dim_in"] = {self.dim_out[1]}, ["dim_out"] = {self.dim_out[1]}}}, + [ap("outputTanhL")] = {{}, {["dim_in"] = {self.dim_out[1]}, ["dim_out"] = {self.dim_out[1]}}}, + }, + ["nerv.GateFFFLayer"] = { + [ap("forgetGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2], self.dim_in[3]}, + ["dim_out"] = {self.dim_in[3]}, ["pr"] = pr}}, + [ap("inputGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2], self.dim_in[3]}, + ["dim_out"] = {self.dim_in[3]}, ["pr"] = pr}}, + }, + ["nerv.ElemMulLayer"] = { + [ap("inputGMulL")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3]}}}, + [ap("forgetGMulL")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3]}}}, + }, + } + + local layerRepo = nerv.LayerRepo(layers, pr, global_conf) + + local connections_t = { + ["[1]"] = ap("inputXDup[1]"), + ["[2]"] = ap("inputHDup[1]"), + ["[3]"] = ap("inputCDup[1]"), + + [ap("inputXDup[1]")] = ap("mainAffineL[1]"), + [ap("inputHDup[1]")] = ap("mainAffineL[2]"), + [ap("inputCDup[1]")] = ap("mainAffineL[3]"), + [ap("mainAffineL[1]")] = ap("mainTanhL[1]"), + + [ap("inputXDup[2]")] = ap("inputGateL[1]"), + [ap("inputHDup[2]")] = ap("inputGateL[2]"), + [ap("inputCDup[2]")] = ap("inputGateL[3]"), + + [ap("inputXDup[3]")] = ap("forgetGateL[1]"), + [ap("inputHDup[3]")] = ap("forgetGateL[2]"), + [ap("inputCDup[3]")] = ap("forgetGateL[3]"), + + [ap("mainTanhL[1]")] = ap("inputGMul[1]"), + [ap("inputGateL[1]")] = ap("inputGMul[2]"), + + [ap("inputCDup[4]")] = ap("forgetGMul[1]"), + [ap("forgetGateL[1]")] = ap("forgetGMul[2]"), + + [ap("inputGMul[1]")] = ap("mainCDup[1]"), + [ap("forgetGMul[1]")] = ap("mainCDup[2]"), + + [ap("mainCDup[2]")] = "[2]", + } - self:check_dim_len(1, 1) -- exactly one input and one output + self:check_dim_len(3, 2) -- x, h, c and h, c end function LSTMLayerT:init(batch_size) -- cgit v1.2.3