local GRULayerT = nerv.class('nerv.GRULayerT', 'nerv.LayerT') function GRULayerT:__init(id, global_conf, layer_conf) --input1:x input2:h input3:c(h^~) self.id = id self.dim_in = layer_conf.dim_in self.dim_out = layer_conf.dim_out self.gconf = global_conf if self.dim_in[2] ~= self.dim_out[1] then nerv.error("dim_in[2](%d) mismatch with dim_out[1](%d)", self.dim_in[2], self.dim_out[1]) end --prepare a DAGLayerT to hold the lstm structure local pr = layer_conf.pr if pr == nil then pr = nerv.ParamRepo() end local function ap(str) return self.id .. '.' .. str end local layers = { ["nerv.CombinerLayer"] = { [ap("inputXDup")] = {{}, {["dim_in"] = {self.dim_in[1]}, ["dim_out"] = {self.dim_in[1], self.dim_in[1], self.dim_in[1]}, ["lambda"] = {1}}}, [ap("inputHDup")] = {{}, {["dim_in"] = {self.dim_in[2]}, ["dim_out"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2], self.dim_in[2], self.dim_in[2]}, ["lambda"] = {1}}}, [ap("updateGDup")] = {{}, {["dim_in"] = {self.dim_in[2]}, ["dim_out"] = {self.dim_in[2], self.dim_in[2]}, ["lambda"] = {1}}}, [ap("updateMergeL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_out[1]}, ["lambda"] = {1, -1, 1}}}, }, ["nerv.AffineLayer"] = { [ap("mainAffineL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]}, ["dim_out"] = {self.dim_out[1]}, ["pr"] = pr}}, }, ["nerv.TanhLayer"] = { [ap("mainTanhL")] = {{}, {["dim_in"] = {self.dim_out[1]}, ["dim_out"] = {self.dim_out[1]}}}, }, ["nerv.GateFLayer"] = { [ap("resetGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]}, ["dim_out"] = {self.dim_in[2]}, ["pr"] = pr}}, [ap("updateGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]}, ["dim_out"] = {self.dim_in[2]}, ["pr"] = pr}}, }, ["nerv.ElemMulLayer"] = { [ap("resetGMulL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_in[2]}}}, [ap("updateGMulCL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_in[2]}}}, [ap("updateGMulHL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_in[2]}}}, }, } local layerRepo = nerv.LayerRepo(layers, pr, global_conf) local connections_t = { ["[1]"] = ap("inputXDup[1]"), ["[2]"] = ap("inputHDup[1]"), [ap("inputXDup[1]")] = ap("resetGateL[1]"), [ap("inputHDup[1]")] = ap("resetGateL[2]"), [ap("inputXDup[2]")] = ap("updateGateL[1]"), [ap("inputHDup[2]")] = ap("updateGateL[2]"), [ap("updateGateL[1]")] = ap("updateGDup[1]"), [ap("resetGateL[1]")] = ap("resetGMulL[1]"), [ap("inputHDup[3]")] = ap("resetGMulL[2]"), [ap("inputXDup[3]")] = ap("mainAffineL[1]"), [ap("resetGMulL[1]")] = ap("mainAffineL[2]"), [ap("mainAffineL[1]")] = ap("mainTanhL[1]"), [ap("updateGDup[1]")] = ap("updateGMulHL[1]"), [ap("inputHDup[4]")] = ap("updateGMulHL[2]"), [ap("updateGDup[2]")] = ap("updateGMulCL[1]"), [ap("mainTanhL[1]")] = ap("updateGMulCL[2]"), [ap("inputHDup[5]")] = ap("updateMergeL[1]"), [ap("updateGMulHL[1]")] = ap("updateMergeL[2]"), [ap("updateGMulCL[1]")] = ap("updateMergeL[3]"), [ap("updateMergeL[1]")] = "[1]", } self.dagL = nerv.DAGLayerT(self.id, global_conf, {["dim_in"] = self.dim_in, ["dim_out"] = self.dim_out, ["sub_layers"] = layerRepo, ["connections"] = connections_t}) self:check_dim_len(2, 1) -- x, h and h end function GRULayerT:init(batch_size, chunk_size) self.dagL:init(batch_size, chunk_size) end function GRULayerT:batch_resize(batch_size, chunk_size) self.dagL:batch_resize(batch_size, chunk_size) end function GRULayerT:update(bp_err, input, output, t) self.dagL:update(bp_err, input, output, t) end function GRULayerT:propagate(input, output, t) self.dagL:propagate(input, output, t) end function GRULayerT:back_propagate(bp_err, next_bp_err, input, output, t) self.dagL:back_propagate(bp_err, next_bp_err, input, output, t) end function GRULayerT:get_params() return self.dagL:get_params() end