summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--nerv/Makefile2
-rw-r--r--nerv/examples/lmptb/tnn/init.lua1
-rw-r--r--nerv/examples/lmptb/tnn/layersT/lstm_t.lua (renamed from nerv/examples/lmptb/tnn/layersT/lstm.lua)18
-rw-r--r--nerv/examples/lmptb/tnn/tnn.lua4
-rw-r--r--nerv/examples/lmptb/tnn_ptb_main.lua13
-rw-r--r--nerv/layer/affine.lua1
-rw-r--r--nerv/layer/init.lua12
-rw-r--r--nerv/layer/tanh.lua35
8 files changed, 59 insertions, 27 deletions
diff --git a/nerv/Makefile b/nerv/Makefile
index 55c174c..f154cc3 100644
--- a/nerv/Makefile
+++ b/nerv/Makefile
@@ -30,7 +30,7 @@ LUAT_OBJS := $(addprefix $(OBJ_DIR)/,$(LUAT_OBJS))
OBJS := $(CORE_OBJS) $(NERV_OBJS) $(LUAT_OBJS)
LIBS := $(INST_LIBDIR)/libnerv.so $(LIB_PATH)/libnervcore.so $(LIB_PATH)/libluaT.so
LUA_LIBS := matrix/init.lua io/init.lua init.lua \
- layer/init.lua layer/affine.lua layer/sigmoid.lua layer/softmax_ce.lua layer/softmax.lua \
+ layer/init.lua layer/affine.lua layer/sigmoid.lua layer/tanh.lua layer/softmax_ce.lua layer/softmax.lua \
layer/window.lua layer/bias.lua layer/combiner.lua layer/mse.lua layer/affine_recurrent.lua \
nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/layer_dag.lua \
io/sgd_buffer.lua
diff --git a/nerv/examples/lmptb/tnn/init.lua b/nerv/examples/lmptb/tnn/init.lua
index a7a377e..d45a2fa 100644
--- a/nerv/examples/lmptb/tnn/init.lua
+++ b/nerv/examples/lmptb/tnn/init.lua
@@ -43,6 +43,7 @@ end
nerv.include('tnn.lua')
nerv.include('layersT/softmax_ce_t.lua')
+nerv.include('layersT/lstm_t.lua')
nerv.include('layers/elem_mul.lua')
nerv.include('layers/gate_fff.lua')
nerv.include('layer_dag_t.lua')
diff --git a/nerv/examples/lmptb/tnn/layersT/lstm.lua b/nerv/examples/lmptb/tnn/layersT/lstm_t.lua
index 0da1f38..5b653a3 100644
--- a/nerv/examples/lmptb/tnn/layersT/lstm.lua
+++ b/nerv/examples/lmptb/tnn/layersT/lstm_t.lua
@@ -1,6 +1,7 @@
local LSTMLayerT = nerv.class('nerv.LSTMLayerT', 'nerv.LayerT')
-function LSTMLayerT:__init(id, global_conf, layer_conf)
+function LSTMLayerT:__init(id, gilobal_conf, layer_conf)
+ --input1:x input2:h input3:c
self.id = id
self.dim_in = layer_conf.dim_in
self.dim_out = layer_conf.dim_out
@@ -35,22 +36,15 @@ function LSTMLayerT:batch_resize(batch_size)
-- do nothing
end
-function AffineLayer:update(bp_err, input, output)
- self.ltp:update_by_err_input(bp_err[1], input[1])
- self.bp:update_by_gradient(bp_err[1]:colsum())
+function LSTMLayerT:update(bp_err, input, output)
end
-function AffineLayer:propagate(input, output)
- -- apply linear transform
- output[1]:mul(input[1], self.ltp.trans, 1.0, 0.0, 'N', 'N')
- -- add bias
- output[1]:add_row(self.bp.trans, 1.0)
+function LSTMLayerT:propagate(input, output)
end
-function AffineLayer:back_propagate(bp_err, next_bp_err, input, output)
- next_bp_err[1]:mul(bp_err[1], self.ltp.trans, 1.0, 0.0, 'N', 'T')
+function LSTMLayerT:back_propagate(bp_err, next_bp_err, input, output)
end
-function AffineLayer:get_params()
+function LSTMLayerT:get_params()
return nerv.ParamRepo({self.ltp, self.bp})
end
diff --git a/nerv/examples/lmptb/tnn/tnn.lua b/nerv/examples/lmptb/tnn/tnn.lua
index c2e397c..c87f963 100644
--- a/nerv/examples/lmptb/tnn/tnn.lua
+++ b/nerv/examples/lmptb/tnn/tnn.lua
@@ -1,4 +1,4 @@
-local TNN = nerv.class("nerv.TNN", "nerv.Layer")
+local TNN = nerv.class("nerv.TNN")
local function parse_id(str)
--used to parse layerid[portid],time
@@ -541,7 +541,7 @@ end
--Return: nerv.ParamRepo
function TNN:get_params()
local param_repos = {}
- for id, ref in pairs(self.queue) do
+ for id, ref in pairs(self.layers) do
table.insert(param_repos, ref.layer:get_params())
end
return nerv.ParamRepo.merge(param_repos)
diff --git a/nerv/examples/lmptb/tnn_ptb_main.lua b/nerv/examples/lmptb/tnn_ptb_main.lua
index 9156b61..16024a8 100644
--- a/nerv/examples/lmptb/tnn_ptb_main.lua
+++ b/nerv/examples/lmptb/tnn_ptb_main.lua
@@ -69,12 +69,12 @@ end
function prepare_layers(global_conf)
printf("%s preparing layers...\n", global_conf.sche_log_pre)
- local paramRepo = global_conf.paramRepo
+ local pr = global_conf.paramRepo
local du = false
--local recurrentLconfig = {{["bp"] = "bp_h", ["ltp_hh"] = "ltp_hh"}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["break_id"] = global_conf.vocab:get_sen_entry().id, ["independent"] = global_conf.independent, ["clip"] = 10}}
- local recurrentLconfig = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["clip"] = 10, ["direct_update"] = du}}
+ local recurrentLconfig = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["clip"] = 10, ["direct_update"] = du, ["pr"] = pr}}
local layers = {
["nerv.AffineRecurrentLayer"] = {
@@ -82,7 +82,7 @@ function prepare_layers(global_conf)
},
["nerv.SelectLinearLayer"] = {
- ["selectL1"] = {{}, {["dim_in"] = {1}, ["dim_out"] = {global_conf.hidden_size}, ["vocab"] = global_conf.vocab}},
+ ["selectL1"] = {{}, {["dim_in"] = {1}, ["dim_out"] = {global_conf.hidden_size}, ["vocab"] = global_conf.vocab, ["pr"] = pr}},
},
["nerv.SigmoidLayer"] = {
@@ -94,7 +94,7 @@ function prepare_layers(global_conf)
},
["nerv.AffineLayer"] = {
- ["outputL"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.vocab:size()}, ["direct_update"] = du}},
+ ["outputL"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.vocab:size()}, ["direct_update"] = du, ["pr"] = pr}},
},
["nerv.SoftmaxCELayerT"] = {
@@ -111,7 +111,7 @@ function prepare_layers(global_conf)
end
--]]
- local layerRepo = nerv.LayerRepo(layers, paramRepo, global_conf)
+ local layerRepo = nerv.LayerRepo(layers, pr, global_conf)
printf("%s preparing layers end.\n", global_conf.sche_log_pre)
return layerRepo
end
@@ -220,7 +220,7 @@ global_conf = {
test_fn = test_fn,
vocab_fn = vocab_fn,
sche_log_pre = "[SCHEDULER]:",
- log_w_num = 40000, --give a message when log_w_num words have been processed
+ log_w_num = 400000, --give a message when log_w_num words have been processed
timer = nerv.Timer(),
work_dir_base = '/home/slhome/txh18/workspace/sentenceCompletion/EXP-Nerv/rnnlm_test'
}
@@ -305,6 +305,7 @@ end
if start_iter == -1 or start_iter == 0 then
print("===INITIAL VALIDATION===")
local tnn = load_net(global_conf, 0)
+ global_conf.paramRepo = tnn:get_params() --get auto-generted params
global_conf.paramRepo:export(global_conf.param_fn .. '.0', nil) --some parameters are auto-generated, saved again to param.0 file
local result = LMTrainer.lm_process_file(global_conf, global_conf.valid_fn, tnn, false) --false update!
nerv.LMUtil.wait(1)
diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua
index d56fcb8..566e9bc 100644
--- a/nerv/layer/affine.lua
+++ b/nerv/layer/affine.lua
@@ -127,4 +127,5 @@ function AffineLayer:get_params()
for i = 2, #self.dim_in do
pr:add(self["ltp" .. i].id, self["ltp" .. i])
end
+ return pr
end
diff --git a/nerv/layer/init.lua b/nerv/layer/init.lua
index c6d0a98..b8b7ea1 100644
--- a/nerv/layer/init.lua
+++ b/nerv/layer/init.lua
@@ -72,27 +72,27 @@ end
function Layer:find_param(pid, l_conf, gconf, p_type, p_dim)
if l_conf[pid] ~= nil then
- nerv.printf("Param [%s] of layer [%s] found in layer_conf.\n", pid, self.id)
+ nerv.info("Param [%s] of layer [%s] found in layer_conf.\n", pid, self.id)
return l_conf[pid]
end
local pid_g = self.id .. '_' .. pid --global identifier
- local pr = gconf.paramRepo
+ local pr = l_conf.pr
local p
- if pr:has_param(pid_g) == true then
- nerv.printf("Param [%s] of layer [%s] found in paramRepo.\n", pid, self.id)
+ if pr ~= nil and pr:has_param(pid_g) == true then
+ nerv.info("Param [%s] of layer [%s] found in layer_conf.paramRepo.\n", pid, self.id)
p = pr:get_param(pid_g)
return p
end
- nerv.printf("Param [%s] of layer [%s] is not found in layer_conf or paramRepo, switch to auto-generate.\n", pid, self.id)
+ nerv.info("Param [%s] of layer [%s] is not found in layer_conf or layer_conf.paramRepo, switch to auto-generate.\n", pid, self.id)
p = p_type(pid_g, gconf)
p.trans = gconf.cumat_type(unpack(p_dim))
p.trans:generate(gconf.param_random)
- pr:add(pid_g, p) --add the parameter into the paramRepo
return p
end
nerv.include('affine.lua')
nerv.include('sigmoid.lua')
+nerv.include('tanh.lua')
nerv.include('softmax_ce.lua')
nerv.include('bias.lua')
nerv.include('window.lua')
diff --git a/nerv/layer/tanh.lua b/nerv/layer/tanh.lua
new file mode 100644
index 0000000..e1c32f2
--- /dev/null
+++ b/nerv/layer/tanh.lua
@@ -0,0 +1,35 @@
+local TanhLayer = nerv.class("nerv.TanhLayer", "nerv.Layer")
+
+function TanhLayer:__init(id, global_conf, layer_conf)
+ self.id = id
+ self.gconf = global_conf
+ self.dim_in = layer_conf.dim_in
+ self.dim_out = layer_conf.dim_out
+ self:check_dim_len(1, 1)
+end
+
+function TanhLayer:init()
+ if self.dim_in[1] ~= self.dim_out[1] then
+ nerv.error("mismatching dimensions of input and output")
+ end
+end
+
+function TanhLayer:batch_resize(batch_size)
+ -- do nothing
+end
+
+function TanhLayer:update(bp_err, input, output)
+ -- no params, therefore do nothing
+end
+
+function TanhLayer:propagate(input, output)
+ output[1]:tanh(input[1])
+end
+
+function TanhLayer:back_propagate(bp_err, next_bp_err, input, output)
+ next_bp_err[1]:tanh_grad(bp_err[1], output[1])
+end
+
+function TanhLayer:get_params()
+ return nerv.ParamRepo({})
+end