diff options
author | Determinant <[email protected]> | 2016-02-21 00:28:54 +0800 |
---|---|---|
committer | Determinant <[email protected]> | 2016-02-21 00:28:54 +0800 |
commit | 8f19acf152652ff887d3fe978e78a076dca60611 (patch) | |
tree | 8ced512733bca426d479f44320f15110090ac986 /nerv/layer/lstm.lua | |
parent | 620c1971c3c821337cd16cca20cddd27f7bc6085 (diff) |
add layers from `layersT/` to `layer/`
Diffstat (limited to 'nerv/layer/lstm.lua')
-rw-r--r-- | nerv/layer/lstm.lua | 140 |
1 files changed, 140 insertions, 0 deletions
diff --git a/nerv/layer/lstm.lua b/nerv/layer/lstm.lua new file mode 100644 index 0000000..500bd87 --- /dev/null +++ b/nerv/layer/lstm.lua @@ -0,0 +1,140 @@ +local LSTMLayer = nerv.class('nerv.LSTMLayer', 'nerv.Layer') + +function LSTMLayer:__init(id, global_conf, layer_conf) + -- input1:x + -- input2:h + -- input3:c + self.id = id + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self.gconf = global_conf + + -- prepare a DAGLayer to hold the lstm structure + local pr = layer_conf.pr + if pr == nil then + pr = nerv.ParamRepo() + end + + local function ap(str) + return self.id .. '.' .. str + end + local din1, din2, din3 = self.dim_in[1], self.dim_in[2], self.dim_in[3] + local dout1, dout2, dout3 = self.dim_out[1], self.dim_out[2], self.dim_out[3] + local layers = { + ["nerv.CombinerLayer"] = { + [ap("inputXDup")] = {{}, {dim_in = {din1}, + dim_out = {din1, din1, din1, din1}, + lambda = {1}}}, + + [ap("inputHDup")] = {{}, {dim_in = {din2}, + dim_out = {din2, din2, din2, din2}, + lambda = {1}}}, + + [ap("inputCDup")] = {{}, {dim_in = {din3}, + dim_out = {din3, din3, din3}, + lambda = {1}}}, + + [ap("mainCDup")] = {{}, {dim_in = {din3, din3}, + dim_out = {din3, din3, din3}, + lambda = {1, 1}}}, + }, + ["nerv.AffineLayer"] = { + [ap("mainAffineL")] = {{}, {dim_in = {din1, din2}, + dim_out = {dout1}, + pr = pr}}, + }, + ["nerv.TanhLayer"] = { + [ap("mainTanhL")] = {{}, {dim_in = {dout1}, dim_out = {dout1}}}, + [ap("outputTanhL")] = {{}, {dim_in = {dout1}, dim_out = {dout1}}}, + }, + ["nerv.LSTMGateLayer"] = { + [ap("forgetGateL")] = {{}, {dim_in = {din1, din2, din3}, + dim_out = {din3}, pr = pr}}, + [ap("inputGateL")] = {{}, {dim_in = {din1, din2, din3}, + dim_out = {din3}, pr = pr}}, + [ap("outputGateL")] = {{}, {dim_in = {din1, din2, din3}, + dim_out = {din3}, pr = pr}}, + + }, + ["nerv.ElemMulLayer"] = { + [ap("inputGMulL")] = {{}, {dim_in = {din3, din3}, + dim_out = {din3}}}, + [ap("forgetGMulL")] = {{}, {dim_in = {din3, din3}, + dim_out = {din3}}}, + [ap("outputGMulL")] = {{}, {dim_in = {din3, din3}, + dim_out = {din3}}}, + }, + } + + local layerRepo = nerv.LayerRepo(layers, pr, global_conf) + + local connections = { + ["<input>[1]"] = ap("inputXDup[1]"), + ["<input>[2]"] = ap("inputHDup[1]"), + ["<input>[3]"] = ap("inputCDup[1]"), + + [ap("inputXDup[1]")] = ap("mainAffineL[1]"), + [ap("inputHDup[1]")] = ap("mainAffineL[2]"), + [ap("mainAffineL[1]")] = ap("mainTanhL[1]"), + + [ap("inputXDup[2]")] = ap("inputGateL[1]"), + [ap("inputHDup[2]")] = ap("inputGateL[2]"), + [ap("inputCDup[1]")] = ap("inputGateL[3]"), + + [ap("inputXDup[3]")] = ap("forgetGateL[1]"), + [ap("inputHDup[3]")] = ap("forgetGateL[2]"), + [ap("inputCDup[2]")] = ap("forgetGateL[3]"), + + [ap("mainTanhL[1]")] = ap("inputGMulL[1]"), + [ap("inputGateL[1]")] = ap("inputGMulL[2]"), + + [ap("inputCDup[3]")] = ap("forgetGMulL[1]"), + [ap("forgetGateL[1]")] = ap("forgetGMulL[2]"), + + [ap("inputGMulL[1]")] = ap("mainCDup[1]"), + [ap("forgetGMulL[1]")] = ap("mainCDup[2]"), + + [ap("inputXDup[4]")] = ap("outputGateL[1]"), + [ap("inputHDup[4]")] = ap("outputGateL[2]"), + [ap("mainCDup[3]")] = ap("outputGateL[3]"), + + [ap("mainCDup[2]")] = "<output>[2]", + [ap("mainCDup[1]")] = ap("outputTanhL[1]"), + + [ap("outputTanhL[1]")] = ap("outputGMulL[1]"), + [ap("outputGateL[1]")] = ap("outputGMulL[2]"), + + [ap("outputGMulL[1]")] = "<output>[1]", + } + self.dag = nerv.DAGLayer(self.id, global_conf, + {dim_in = self.dim_in, + dim_out = self.dim_out, + sub_layers = layerRepo, + connections = connections}) + + self:check_dim_len(3, 2) -- x, h, c and h, c +end + +function LSTMLayer:init(batch_size, chunk_size) + self.dag:init(batch_size, chunk_size) +end + +function LSTMLayer:batch_resize(batch_size, chunk_size) + self.dag:batch_resize(batch_size, chunk_size) +end + +function LSTMLayer:update(bp_err, input, output, t) + self.dag:update(bp_err, input, output, t) +end + +function LSTMLayer:propagate(input, output, t) + self.dag:propagate(input, output, t) +end + +function LSTMLayer:back_propagate(bp_err, next_bp_err, input, output, t) + self.dag:back_propagate(bp_err, next_bp_err, input, output, t) +end + +function LSTMLayer:get_params() + return self.dag:get_params() +end |