From 3f84267f9fe035ac7ed305a7d66ecab9f7ae50e3 Mon Sep 17 00:00:00 2001 From: txh18 Date: Fri, 29 Jan 2016 17:38:05 +0800 Subject: moved gru_t to tnn/layersT --- nerv/Makefile | 2 +- nerv/examples/lmptb/lmptb/layer/gru_t.lua | 114 ------------------------------ nerv/examples/lmptb/lmptb/layer/init.lua | 2 +- nerv/tnn/init.lua | 1 + nerv/tnn/layersT/gru_t.lua | 114 ++++++++++++++++++++++++++++++ 5 files changed, 117 insertions(+), 116 deletions(-) delete mode 100644 nerv/examples/lmptb/lmptb/layer/gru_t.lua create mode 100644 nerv/tnn/layersT/gru_t.lua diff --git a/nerv/Makefile b/nerv/Makefile index 5c329f9..db5df22 100644 --- a/nerv/Makefile +++ b/nerv/Makefile @@ -36,7 +36,7 @@ LUA_LIBS := matrix/init.lua io/init.lua init.lua \ nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/layer_dag.lua \ io/sgd_buffer.lua \ tnn/init.lua tnn/layer_dag_t.lua tnn/sutil.lua tnn/tnn.lua \ - tnn/layersT/dropout_t.lua tnn/layersT/lstm_t.lua tnn/layersT/softmax_ce_t.lua + tnn/layersT/dropout_t.lua tnn/layersT/lstm_t.lua tnn/layersT/gru_t.lua tnn/layersT/softmax_ce_t.lua INCLUDE := -I $(LUA_INCDIR) -DLUA_USE_APICHECK #CUDA_BASE := /usr/local/cuda-7.0 diff --git a/nerv/examples/lmptb/lmptb/layer/gru_t.lua b/nerv/examples/lmptb/lmptb/layer/gru_t.lua deleted file mode 100644 index 8f15cc8..0000000 --- a/nerv/examples/lmptb/lmptb/layer/gru_t.lua +++ /dev/null @@ -1,114 +0,0 @@ -local GRULayerT = nerv.class('nerv.GRULayerT', 'nerv.LayerT') - -function GRULayerT:__init(id, global_conf, layer_conf) - --input1:x input2:h input3:c(h^~) - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf - - if self.dim_in[2] ~= self.dim_out[1] then - nerv.error("dim_in[2](%d) mismatch with dim_out[1](%d)", self.dim_in[2], self.dim_out[1]) - end - - --prepare a DAGLayerT to hold the lstm structure - local pr = layer_conf.pr - if pr == nil then - pr = nerv.ParamRepo() - end - - local function ap(str) - return self.id .. '.' .. str - end - - local layers = { - ["nerv.CombinerLayer"] = { - [ap("inputXDup")] = {{}, {["dim_in"] = {self.dim_in[1]}, - ["dim_out"] = {self.dim_in[1], self.dim_in[1], self.dim_in[1]}, ["lambda"] = {1}}}, - [ap("inputHDup")] = {{}, {["dim_in"] = {self.dim_in[2]}, - ["dim_out"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2], self.dim_in[2], self.dim_in[2]}, ["lambda"] = {1}}}, - [ap("updateGDup")] = {{}, {["dim_in"] = {self.dim_in[2]}, - ["dim_out"] = {self.dim_in[2], self.dim_in[2]}, ["lambda"] = {1}}}, - [ap("updateMergeL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_out[1]}, - ["lambda"] = {1, -1, 1}}}, - }, - ["nerv.AffineLayer"] = { - [ap("mainAffineL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]}, ["dim_out"] = {self.dim_out[1]}, ["pr"] = pr}}, - }, - ["nerv.TanhLayer"] = { - [ap("mainTanhL")] = {{}, {["dim_in"] = {self.dim_out[1]}, ["dim_out"] = {self.dim_out[1]}}}, - }, - ["nerv.GateFLayer"] = { - [ap("resetGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]}, - ["dim_out"] = {self.dim_in[2]}, ["pr"] = pr}}, - [ap("updateGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]}, - ["dim_out"] = {self.dim_in[2]}, ["pr"] = pr}}, - }, - ["nerv.ElemMulLayer"] = { - [ap("resetGMulL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_in[2]}}}, - [ap("updateGMulCL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_in[2]}}}, - [ap("updateGMulHL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_in[2]}}}, - }, - } - - local layerRepo = nerv.LayerRepo(layers, pr, global_conf) - - local connections_t = { - ["[1]"] = ap("inputXDup[1]"), - ["[2]"] = ap("inputHDup[1]"), - - [ap("inputXDup[1]")] = ap("resetGateL[1]"), - [ap("inputHDup[1]")] = ap("resetGateL[2]"), - [ap("inputXDup[2]")] = ap("updateGateL[1]"), - [ap("inputHDup[2]")] = ap("updateGateL[2]"), - [ap("updateGateL[1]")] = ap("updateGDup[1]"), - - [ap("resetGateL[1]")] = ap("resetGMulL[1]"), - [ap("inputHDup[3]")] = ap("resetGMulL[2]"), - - [ap("inputXDup[3]")] = ap("mainAffineL[1]"), - [ap("resetGMulL[1]")] = ap("mainAffineL[2]"), - [ap("mainAffineL[1]")] = ap("mainTanhL[1]"), - - [ap("updateGDup[1]")] = ap("updateGMulHL[1]"), - [ap("inputHDup[4]")] = ap("updateGMulHL[2]"), - [ap("updateGDup[2]")] = ap("updateGMulCL[1]"), - [ap("mainTanhL[1]")] = ap("updateGMulCL[2]"), - - [ap("inputHDup[5]")] = ap("updateMergeL[1]"), - [ap("updateGMulHL[1]")] = ap("updateMergeL[2]"), - [ap("updateGMulCL[1]")] = ap("updateMergeL[3]"), - - [ap("updateMergeL[1]")] = "[1]", - } - - self.dagL = nerv.DAGLayerT(self.id, global_conf, - {["dim_in"] = self.dim_in, ["dim_out"] = self.dim_out, ["sub_layers"] = layerRepo, - ["connections"] = connections_t}) - - self:check_dim_len(2, 1) -- x, h and h -end - -function GRULayerT:init(batch_size, chunk_size) - self.dagL:init(batch_size, chunk_size) -end - -function GRULayerT:batch_resize(batch_size, chunk_size) - self.dagL:batch_resize(batch_size, chunk_size) -end - -function GRULayerT:update(bp_err, input, output, t) - self.dagL:update(bp_err, input, output, t) -end - -function GRULayerT:propagate(input, output, t) - self.dagL:propagate(input, output, t) -end - -function GRULayerT:back_propagate(bp_err, next_bp_err, input, output, t) - self.dagL:back_propagate(bp_err, next_bp_err, input, output, t) -end - -function GRULayerT:get_params() - return self.dagL:get_params() -end diff --git a/nerv/examples/lmptb/lmptb/layer/init.lua b/nerv/examples/lmptb/lmptb/layer/init.lua index b345244..ceae009 100644 --- a/nerv/examples/lmptb/lmptb/layer/init.lua +++ b/nerv/examples/lmptb/lmptb/layer/init.lua @@ -1,6 +1,6 @@ require 'lmptb.layer.select_linear' require 'lmptb.layer.affine_recurrent_plusvec' -require 'lmptb.layer.gru_t' +--require 'lmptb.layer.gru_t' require 'lmptb.layer.lm_affine_recurrent' diff --git a/nerv/tnn/init.lua b/nerv/tnn/init.lua index b375fa8..7faca31 100644 --- a/nerv/tnn/init.lua +++ b/nerv/tnn/init.lua @@ -47,5 +47,6 @@ nerv.include('sutil.lua') nerv.include('tnn.lua') nerv.include('layersT/softmax_ce_t.lua') nerv.include('layersT/lstm_t.lua') +nerv.include('layersT/gru_t.lua') nerv.include('layersT/dropout_t.lua') nerv.include('layer_dag_t.lua') diff --git a/nerv/tnn/layersT/gru_t.lua b/nerv/tnn/layersT/gru_t.lua new file mode 100644 index 0000000..8f15cc8 --- /dev/null +++ b/nerv/tnn/layersT/gru_t.lua @@ -0,0 +1,114 @@ +local GRULayerT = nerv.class('nerv.GRULayerT', 'nerv.LayerT') + +function GRULayerT:__init(id, global_conf, layer_conf) + --input1:x input2:h input3:c(h^~) + self.id = id + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self.gconf = global_conf + + if self.dim_in[2] ~= self.dim_out[1] then + nerv.error("dim_in[2](%d) mismatch with dim_out[1](%d)", self.dim_in[2], self.dim_out[1]) + end + + --prepare a DAGLayerT to hold the lstm structure + local pr = layer_conf.pr + if pr == nil then + pr = nerv.ParamRepo() + end + + local function ap(str) + return self.id .. '.' .. str + end + + local layers = { + ["nerv.CombinerLayer"] = { + [ap("inputXDup")] = {{}, {["dim_in"] = {self.dim_in[1]}, + ["dim_out"] = {self.dim_in[1], self.dim_in[1], self.dim_in[1]}, ["lambda"] = {1}}}, + [ap("inputHDup")] = {{}, {["dim_in"] = {self.dim_in[2]}, + ["dim_out"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2], self.dim_in[2], self.dim_in[2]}, ["lambda"] = {1}}}, + [ap("updateGDup")] = {{}, {["dim_in"] = {self.dim_in[2]}, + ["dim_out"] = {self.dim_in[2], self.dim_in[2]}, ["lambda"] = {1}}}, + [ap("updateMergeL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_out[1]}, + ["lambda"] = {1, -1, 1}}}, + }, + ["nerv.AffineLayer"] = { + [ap("mainAffineL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]}, ["dim_out"] = {self.dim_out[1]}, ["pr"] = pr}}, + }, + ["nerv.TanhLayer"] = { + [ap("mainTanhL")] = {{}, {["dim_in"] = {self.dim_out[1]}, ["dim_out"] = {self.dim_out[1]}}}, + }, + ["nerv.GateFLayer"] = { + [ap("resetGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]}, + ["dim_out"] = {self.dim_in[2]}, ["pr"] = pr}}, + [ap("updateGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]}, + ["dim_out"] = {self.dim_in[2]}, ["pr"] = pr}}, + }, + ["nerv.ElemMulLayer"] = { + [ap("resetGMulL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_in[2]}}}, + [ap("updateGMulCL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_in[2]}}}, + [ap("updateGMulHL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_in[2]}}}, + }, + } + + local layerRepo = nerv.LayerRepo(layers, pr, global_conf) + + local connections_t = { + ["[1]"] = ap("inputXDup[1]"), + ["[2]"] = ap("inputHDup[1]"), + + [ap("inputXDup[1]")] = ap("resetGateL[1]"), + [ap("inputHDup[1]")] = ap("resetGateL[2]"), + [ap("inputXDup[2]")] = ap("updateGateL[1]"), + [ap("inputHDup[2]")] = ap("updateGateL[2]"), + [ap("updateGateL[1]")] = ap("updateGDup[1]"), + + [ap("resetGateL[1]")] = ap("resetGMulL[1]"), + [ap("inputHDup[3]")] = ap("resetGMulL[2]"), + + [ap("inputXDup[3]")] = ap("mainAffineL[1]"), + [ap("resetGMulL[1]")] = ap("mainAffineL[2]"), + [ap("mainAffineL[1]")] = ap("mainTanhL[1]"), + + [ap("updateGDup[1]")] = ap("updateGMulHL[1]"), + [ap("inputHDup[4]")] = ap("updateGMulHL[2]"), + [ap("updateGDup[2]")] = ap("updateGMulCL[1]"), + [ap("mainTanhL[1]")] = ap("updateGMulCL[2]"), + + [ap("inputHDup[5]")] = ap("updateMergeL[1]"), + [ap("updateGMulHL[1]")] = ap("updateMergeL[2]"), + [ap("updateGMulCL[1]")] = ap("updateMergeL[3]"), + + [ap("updateMergeL[1]")] = "[1]", + } + + self.dagL = nerv.DAGLayerT(self.id, global_conf, + {["dim_in"] = self.dim_in, ["dim_out"] = self.dim_out, ["sub_layers"] = layerRepo, + ["connections"] = connections_t}) + + self:check_dim_len(2, 1) -- x, h and h +end + +function GRULayerT:init(batch_size, chunk_size) + self.dagL:init(batch_size, chunk_size) +end + +function GRULayerT:batch_resize(batch_size, chunk_size) + self.dagL:batch_resize(batch_size, chunk_size) +end + +function GRULayerT:update(bp_err, input, output, t) + self.dagL:update(bp_err, input, output, t) +end + +function GRULayerT:propagate(input, output, t) + self.dagL:propagate(input, output, t) +end + +function GRULayerT:back_propagate(bp_err, next_bp_err, input, output, t) + self.dagL:back_propagate(bp_err, next_bp_err, input, output, t) +end + +function GRULayerT:get_params() + return self.dagL:get_params() +end -- cgit v1.2.3-70-g09d2