summaryrefslogtreecommitdiff
path: root/nerv/tnn/layersT/gru_t.lua
diff options
context:
space:
mode:
Diffstat (limited to 'nerv/tnn/layersT/gru_t.lua')
-rw-r--r--nerv/tnn/layersT/gru_t.lua114
1 files changed, 114 insertions, 0 deletions
diff --git a/nerv/tnn/layersT/gru_t.lua b/nerv/tnn/layersT/gru_t.lua
new file mode 100644
index 0000000..8f15cc8
--- /dev/null
+++ b/nerv/tnn/layersT/gru_t.lua
@@ -0,0 +1,114 @@
+local GRULayerT = nerv.class('nerv.GRULayerT', 'nerv.LayerT')
+
+function GRULayerT:__init(id, global_conf, layer_conf)
+ --input1:x input2:h input3:c(h^~)
+ self.id = id
+ self.dim_in = layer_conf.dim_in
+ self.dim_out = layer_conf.dim_out
+ self.gconf = global_conf
+
+ if self.dim_in[2] ~= self.dim_out[1] then
+ nerv.error("dim_in[2](%d) mismatch with dim_out[1](%d)", self.dim_in[2], self.dim_out[1])
+ end
+
+ --prepare a DAGLayerT to hold the lstm structure
+ local pr = layer_conf.pr
+ if pr == nil then
+ pr = nerv.ParamRepo()
+ end
+
+ local function ap(str)
+ return self.id .. '.' .. str
+ end
+
+ local layers = {
+ ["nerv.CombinerLayer"] = {
+ [ap("inputXDup")] = {{}, {["dim_in"] = {self.dim_in[1]},
+ ["dim_out"] = {self.dim_in[1], self.dim_in[1], self.dim_in[1]}, ["lambda"] = {1}}},
+ [ap("inputHDup")] = {{}, {["dim_in"] = {self.dim_in[2]},
+ ["dim_out"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2], self.dim_in[2], self.dim_in[2]}, ["lambda"] = {1}}},
+ [ap("updateGDup")] = {{}, {["dim_in"] = {self.dim_in[2]},
+ ["dim_out"] = {self.dim_in[2], self.dim_in[2]}, ["lambda"] = {1}}},
+ [ap("updateMergeL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_out[1]},
+ ["lambda"] = {1, -1, 1}}},
+ },
+ ["nerv.AffineLayer"] = {
+ [ap("mainAffineL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]}, ["dim_out"] = {self.dim_out[1]}, ["pr"] = pr}},
+ },
+ ["nerv.TanhLayer"] = {
+ [ap("mainTanhL")] = {{}, {["dim_in"] = {self.dim_out[1]}, ["dim_out"] = {self.dim_out[1]}}},
+ },
+ ["nerv.GateFLayer"] = {
+ [ap("resetGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]},
+ ["dim_out"] = {self.dim_in[2]}, ["pr"] = pr}},
+ [ap("updateGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]},
+ ["dim_out"] = {self.dim_in[2]}, ["pr"] = pr}},
+ },
+ ["nerv.ElemMulLayer"] = {
+ [ap("resetGMulL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_in[2]}}},
+ [ap("updateGMulCL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_in[2]}}},
+ [ap("updateGMulHL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_in[2]}}},
+ },
+ }
+
+ local layerRepo = nerv.LayerRepo(layers, pr, global_conf)
+
+ local connections_t = {
+ ["<input>[1]"] = ap("inputXDup[1]"),
+ ["<input>[2]"] = ap("inputHDup[1]"),
+
+ [ap("inputXDup[1]")] = ap("resetGateL[1]"),
+ [ap("inputHDup[1]")] = ap("resetGateL[2]"),
+ [ap("inputXDup[2]")] = ap("updateGateL[1]"),
+ [ap("inputHDup[2]")] = ap("updateGateL[2]"),
+ [ap("updateGateL[1]")] = ap("updateGDup[1]"),
+
+ [ap("resetGateL[1]")] = ap("resetGMulL[1]"),
+ [ap("inputHDup[3]")] = ap("resetGMulL[2]"),
+
+ [ap("inputXDup[3]")] = ap("mainAffineL[1]"),
+ [ap("resetGMulL[1]")] = ap("mainAffineL[2]"),
+ [ap("mainAffineL[1]")] = ap("mainTanhL[1]"),
+
+ [ap("updateGDup[1]")] = ap("updateGMulHL[1]"),
+ [ap("inputHDup[4]")] = ap("updateGMulHL[2]"),
+ [ap("updateGDup[2]")] = ap("updateGMulCL[1]"),
+ [ap("mainTanhL[1]")] = ap("updateGMulCL[2]"),
+
+ [ap("inputHDup[5]")] = ap("updateMergeL[1]"),
+ [ap("updateGMulHL[1]")] = ap("updateMergeL[2]"),
+ [ap("updateGMulCL[1]")] = ap("updateMergeL[3]"),
+
+ [ap("updateMergeL[1]")] = "<output>[1]",
+ }
+
+ self.dagL = nerv.DAGLayerT(self.id, global_conf,
+ {["dim_in"] = self.dim_in, ["dim_out"] = self.dim_out, ["sub_layers"] = layerRepo,
+ ["connections"] = connections_t})
+
+ self:check_dim_len(2, 1) -- x, h and h
+end
+
+function GRULayerT:init(batch_size, chunk_size)
+ self.dagL:init(batch_size, chunk_size)
+end
+
+function GRULayerT:batch_resize(batch_size, chunk_size)
+ self.dagL:batch_resize(batch_size, chunk_size)
+end
+
+function GRULayerT:update(bp_err, input, output, t)
+ self.dagL:update(bp_err, input, output, t)
+end
+
+function GRULayerT:propagate(input, output, t)
+ self.dagL:propagate(input, output, t)
+end
+
+function GRULayerT:back_propagate(bp_err, next_bp_err, input, output, t)
+ self.dagL:back_propagate(bp_err, next_bp_err, input, output, t)
+end
+
+function GRULayerT:get_params()
+ return self.dagL:get_params()
+end