From 75a2d6a2a08caf987017f5a9043ac93afcd70980 Mon Sep 17 00:00:00 2001 From: txh18 Date: Wed, 25 Nov 2015 23:42:37 +0800 Subject: changed auto-generating params, won not save in global_conf.param --- nerv/Makefile | 2 +- nerv/examples/lmptb/tnn/init.lua | 1 + nerv/examples/lmptb/tnn/layersT/lstm.lua | 56 ------------------------------ nerv/examples/lmptb/tnn/layersT/lstm_t.lua | 50 ++++++++++++++++++++++++++ nerv/examples/lmptb/tnn/tnn.lua | 4 +-- nerv/examples/lmptb/tnn_ptb_main.lua | 13 +++---- nerv/layer/affine.lua | 1 + nerv/layer/init.lua | 12 +++---- nerv/layer/tanh.lua | 35 +++++++++++++++++++ 9 files changed, 103 insertions(+), 71 deletions(-) delete mode 100644 nerv/examples/lmptb/tnn/layersT/lstm.lua create mode 100644 nerv/examples/lmptb/tnn/layersT/lstm_t.lua create mode 100644 nerv/layer/tanh.lua diff --git a/nerv/Makefile b/nerv/Makefile index 55c174c..f154cc3 100644 --- a/nerv/Makefile +++ b/nerv/Makefile @@ -30,7 +30,7 @@ LUAT_OBJS := $(addprefix $(OBJ_DIR)/,$(LUAT_OBJS)) OBJS := $(CORE_OBJS) $(NERV_OBJS) $(LUAT_OBJS) LIBS := $(INST_LIBDIR)/libnerv.so $(LIB_PATH)/libnervcore.so $(LIB_PATH)/libluaT.so LUA_LIBS := matrix/init.lua io/init.lua init.lua \ - layer/init.lua layer/affine.lua layer/sigmoid.lua layer/softmax_ce.lua layer/softmax.lua \ + layer/init.lua layer/affine.lua layer/sigmoid.lua layer/tanh.lua layer/softmax_ce.lua layer/softmax.lua \ layer/window.lua layer/bias.lua layer/combiner.lua layer/mse.lua layer/affine_recurrent.lua \ nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/layer_dag.lua \ io/sgd_buffer.lua diff --git a/nerv/examples/lmptb/tnn/init.lua b/nerv/examples/lmptb/tnn/init.lua index a7a377e..d45a2fa 100644 --- a/nerv/examples/lmptb/tnn/init.lua +++ b/nerv/examples/lmptb/tnn/init.lua @@ -43,6 +43,7 @@ end nerv.include('tnn.lua') nerv.include('layersT/softmax_ce_t.lua') +nerv.include('layersT/lstm_t.lua') nerv.include('layers/elem_mul.lua') nerv.include('layers/gate_fff.lua') nerv.include('layer_dag_t.lua') diff --git a/nerv/examples/lmptb/tnn/layersT/lstm.lua b/nerv/examples/lmptb/tnn/layersT/lstm.lua deleted file mode 100644 index 0da1f38..0000000 --- a/nerv/examples/lmptb/tnn/layersT/lstm.lua +++ /dev/null @@ -1,56 +0,0 @@ -local LSTMLayerT = nerv.class('nerv.LSTMLayerT', 'nerv.LayerT') - -function LSTMLayerT:__init(id, global_conf, layer_conf) - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf - - --prepare a DAGLayerT to hold the lstm structure - local paramRepo = nerv.ParamRepo() - local layers = { - ["nerv.IndRecurrentLayer"] = { - ["recurrentL1"] = recurrentLconfig, - }} - - self:check_dim_len(1, 1) -- exactly one input and one output -end - -function LSTMLayerT:init(batch_size) - if self.ltp.trans:ncol() ~= self.bp.trans:ncol() then - nerv.error("mismatching dimensions of linear transform and bias paramter") - end - if self.dim_in[1] ~= self.ltp.trans:nrow() then - nerv.error("mismatching dimensions of linear transform parameter and input") - end - if self.dim_out[1] ~= self.ltp.trans:ncol() then - nerv.error("mismatching dimensions of linear transform parameter and output") - end - self.ltp_grad = self.ltp.trans:create() - self.ltp:train_init() - self.bp:train_init() -end - -function LSTMLayerT:batch_resize(batch_size) - -- do nothing -end - -function AffineLayer:update(bp_err, input, output) - self.ltp:update_by_err_input(bp_err[1], input[1]) - self.bp:update_by_gradient(bp_err[1]:colsum()) -end - -function AffineLayer:propagate(input, output) - -- apply linear transform - output[1]:mul(input[1], self.ltp.trans, 1.0, 0.0, 'N', 'N') - -- add bias - output[1]:add_row(self.bp.trans, 1.0) -end - -function AffineLayer:back_propagate(bp_err, next_bp_err, input, output) - next_bp_err[1]:mul(bp_err[1], self.ltp.trans, 1.0, 0.0, 'N', 'T') -end - -function AffineLayer:get_params() - return nerv.ParamRepo({self.ltp, self.bp}) -end diff --git a/nerv/examples/lmptb/tnn/layersT/lstm_t.lua b/nerv/examples/lmptb/tnn/layersT/lstm_t.lua new file mode 100644 index 0000000..5b653a3 --- /dev/null +++ b/nerv/examples/lmptb/tnn/layersT/lstm_t.lua @@ -0,0 +1,50 @@ +local LSTMLayerT = nerv.class('nerv.LSTMLayerT', 'nerv.LayerT') + +function LSTMLayerT:__init(id, gilobal_conf, layer_conf) + --input1:x input2:h input3:c + self.id = id + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self.gconf = global_conf + + --prepare a DAGLayerT to hold the lstm structure + local paramRepo = nerv.ParamRepo() + local layers = { + ["nerv.IndRecurrentLayer"] = { + ["recurrentL1"] = recurrentLconfig, + }} + + self:check_dim_len(1, 1) -- exactly one input and one output +end + +function LSTMLayerT:init(batch_size) + if self.ltp.trans:ncol() ~= self.bp.trans:ncol() then + nerv.error("mismatching dimensions of linear transform and bias paramter") + end + if self.dim_in[1] ~= self.ltp.trans:nrow() then + nerv.error("mismatching dimensions of linear transform parameter and input") + end + if self.dim_out[1] ~= self.ltp.trans:ncol() then + nerv.error("mismatching dimensions of linear transform parameter and output") + end + self.ltp_grad = self.ltp.trans:create() + self.ltp:train_init() + self.bp:train_init() +end + +function LSTMLayerT:batch_resize(batch_size) + -- do nothing +end + +function LSTMLayerT:update(bp_err, input, output) +end + +function LSTMLayerT:propagate(input, output) +end + +function LSTMLayerT:back_propagate(bp_err, next_bp_err, input, output) +end + +function LSTMLayerT:get_params() + return nerv.ParamRepo({self.ltp, self.bp}) +end diff --git a/nerv/examples/lmptb/tnn/tnn.lua b/nerv/examples/lmptb/tnn/tnn.lua index c2e397c..c87f963 100644 --- a/nerv/examples/lmptb/tnn/tnn.lua +++ b/nerv/examples/lmptb/tnn/tnn.lua @@ -1,4 +1,4 @@ -local TNN = nerv.class("nerv.TNN", "nerv.Layer") +local TNN = nerv.class("nerv.TNN") local function parse_id(str) --used to parse layerid[portid],time @@ -541,7 +541,7 @@ end --Return: nerv.ParamRepo function TNN:get_params() local param_repos = {} - for id, ref in pairs(self.queue) do + for id, ref in pairs(self.layers) do table.insert(param_repos, ref.layer:get_params()) end return nerv.ParamRepo.merge(param_repos) diff --git a/nerv/examples/lmptb/tnn_ptb_main.lua b/nerv/examples/lmptb/tnn_ptb_main.lua index 9156b61..16024a8 100644 --- a/nerv/examples/lmptb/tnn_ptb_main.lua +++ b/nerv/examples/lmptb/tnn_ptb_main.lua @@ -69,12 +69,12 @@ end function prepare_layers(global_conf) printf("%s preparing layers...\n", global_conf.sche_log_pre) - local paramRepo = global_conf.paramRepo + local pr = global_conf.paramRepo local du = false --local recurrentLconfig = {{["bp"] = "bp_h", ["ltp_hh"] = "ltp_hh"}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["break_id"] = global_conf.vocab:get_sen_entry().id, ["independent"] = global_conf.independent, ["clip"] = 10}} - local recurrentLconfig = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["clip"] = 10, ["direct_update"] = du}} + local recurrentLconfig = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["clip"] = 10, ["direct_update"] = du, ["pr"] = pr}} local layers = { ["nerv.AffineRecurrentLayer"] = { @@ -82,7 +82,7 @@ function prepare_layers(global_conf) }, ["nerv.SelectLinearLayer"] = { - ["selectL1"] = {{}, {["dim_in"] = {1}, ["dim_out"] = {global_conf.hidden_size}, ["vocab"] = global_conf.vocab}}, + ["selectL1"] = {{}, {["dim_in"] = {1}, ["dim_out"] = {global_conf.hidden_size}, ["vocab"] = global_conf.vocab, ["pr"] = pr}}, }, ["nerv.SigmoidLayer"] = { @@ -94,7 +94,7 @@ function prepare_layers(global_conf) }, ["nerv.AffineLayer"] = { - ["outputL"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.vocab:size()}, ["direct_update"] = du}}, + ["outputL"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.vocab:size()}, ["direct_update"] = du, ["pr"] = pr}}, }, ["nerv.SoftmaxCELayerT"] = { @@ -111,7 +111,7 @@ function prepare_layers(global_conf) end --]] - local layerRepo = nerv.LayerRepo(layers, paramRepo, global_conf) + local layerRepo = nerv.LayerRepo(layers, pr, global_conf) printf("%s preparing layers end.\n", global_conf.sche_log_pre) return layerRepo end @@ -220,7 +220,7 @@ global_conf = { test_fn = test_fn, vocab_fn = vocab_fn, sche_log_pre = "[SCHEDULER]:", - log_w_num = 40000, --give a message when log_w_num words have been processed + log_w_num = 400000, --give a message when log_w_num words have been processed timer = nerv.Timer(), work_dir_base = '/home/slhome/txh18/workspace/sentenceCompletion/EXP-Nerv/rnnlm_test' } @@ -305,6 +305,7 @@ end if start_iter == -1 or start_iter == 0 then print("===INITIAL VALIDATION===") local tnn = load_net(global_conf, 0) + global_conf.paramRepo = tnn:get_params() --get auto-generted params global_conf.paramRepo:export(global_conf.param_fn .. '.0', nil) --some parameters are auto-generated, saved again to param.0 file local result = LMTrainer.lm_process_file(global_conf, global_conf.valid_fn, tnn, false) --false update! nerv.LMUtil.wait(1) diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua index d56fcb8..566e9bc 100644 --- a/nerv/layer/affine.lua +++ b/nerv/layer/affine.lua @@ -127,4 +127,5 @@ function AffineLayer:get_params() for i = 2, #self.dim_in do pr:add(self["ltp" .. i].id, self["ltp" .. i]) end + return pr end diff --git a/nerv/layer/init.lua b/nerv/layer/init.lua index c6d0a98..b8b7ea1 100644 --- a/nerv/layer/init.lua +++ b/nerv/layer/init.lua @@ -72,27 +72,27 @@ end function Layer:find_param(pid, l_conf, gconf, p_type, p_dim) if l_conf[pid] ~= nil then - nerv.printf("Param [%s] of layer [%s] found in layer_conf.\n", pid, self.id) + nerv.info("Param [%s] of layer [%s] found in layer_conf.\n", pid, self.id) return l_conf[pid] end local pid_g = self.id .. '_' .. pid --global identifier - local pr = gconf.paramRepo + local pr = l_conf.pr local p - if pr:has_param(pid_g) == true then - nerv.printf("Param [%s] of layer [%s] found in paramRepo.\n", pid, self.id) + if pr ~= nil and pr:has_param(pid_g) == true then + nerv.info("Param [%s] of layer [%s] found in layer_conf.paramRepo.\n", pid, self.id) p = pr:get_param(pid_g) return p end - nerv.printf("Param [%s] of layer [%s] is not found in layer_conf or paramRepo, switch to auto-generate.\n", pid, self.id) + nerv.info("Param [%s] of layer [%s] is not found in layer_conf or layer_conf.paramRepo, switch to auto-generate.\n", pid, self.id) p = p_type(pid_g, gconf) p.trans = gconf.cumat_type(unpack(p_dim)) p.trans:generate(gconf.param_random) - pr:add(pid_g, p) --add the parameter into the paramRepo return p end nerv.include('affine.lua') nerv.include('sigmoid.lua') +nerv.include('tanh.lua') nerv.include('softmax_ce.lua') nerv.include('bias.lua') nerv.include('window.lua') diff --git a/nerv/layer/tanh.lua b/nerv/layer/tanh.lua new file mode 100644 index 0000000..e1c32f2 --- /dev/null +++ b/nerv/layer/tanh.lua @@ -0,0 +1,35 @@ +local TanhLayer = nerv.class("nerv.TanhLayer", "nerv.Layer") + +function TanhLayer:__init(id, global_conf, layer_conf) + self.id = id + self.gconf = global_conf + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self:check_dim_len(1, 1) +end + +function TanhLayer:init() + if self.dim_in[1] ~= self.dim_out[1] then + nerv.error("mismatching dimensions of input and output") + end +end + +function TanhLayer:batch_resize(batch_size) + -- do nothing +end + +function TanhLayer:update(bp_err, input, output) + -- no params, therefore do nothing +end + +function TanhLayer:propagate(input, output) + output[1]:tanh(input[1]) +end + +function TanhLayer:back_propagate(bp_err, next_bp_err, input, output) + next_bp_err[1]:tanh_grad(bp_err[1], output[1]) +end + +function TanhLayer:get_params() + return nerv.ParamRepo({}) +end -- cgit v1.2.3-70-g09d2