aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--nerv/examples/lmptb/lm_trainer.lua10
-rw-r--r--nerv/examples/lmptb/lmptb/lmutil.lua6
-rw-r--r--nerv/layer/affine.lua17
3 files changed, 20 insertions, 13 deletions
diff --git a/nerv/examples/lmptb/lm_trainer.lua b/nerv/examples/lmptb/lm_trainer.lua
index 2be97c8..7c11a34 100644
--- a/nerv/examples/lmptb/lm_trainer.lua
+++ b/nerv/examples/lmptb/lm_trainer.lua
@@ -22,6 +22,8 @@ function LMTrainer.lm_process_file(global_conf, fn, tnn, do_train)
local next_log_wcn = global_conf.log_w_num
while (1) do
+ global_conf.timer:tic('most_out_loop_lmprocessfile')
+
local r, feeds
r, feeds = tnn:getFeedFromReader(reader)
@@ -60,12 +62,17 @@ function LMTrainer.lm_process_file(global_conf, fn, tnn, do_train)
end
end
end
+
+ tnn:moveRightToNextMB()
+ global_conf.timer:tic('most_out_loop_lmprocessfile')
+
+ --print log
if (result["rnn"].cn_w > next_log_wcn) then
next_log_wcn = next_log_wcn + global_conf.log_w_num
printf("%s %d words processed %s.\n", global_conf.sche_log_pre, result["rnn"].cn_w, os.date())
printf("\t%s log prob per sample :%f.\n", global_conf.sche_log_pre, result:logp_sample("rnn"))
for key, value in pairs(global_conf.timer.rec) do
- printf("\t [global_conf.timer]: time spent on %s:%.5fs\n", key, value)
+ printf("\t [global_conf.timer]: time spent on %s:%.5f clock time\n", key, value)
end
global_conf.timer:flush()
nerv.LMUtil.wait(0.1)
@@ -77,7 +84,6 @@ function LMTrainer.lm_process_file(global_conf, fn, tnn, do_train)
end
]]--
- tnn:moveRightToNextMB()
collectgarbage("collect")
diff --git a/nerv/examples/lmptb/lmptb/lmutil.lua b/nerv/examples/lmptb/lmptb/lmutil.lua
index 77babef..821aa94 100644
--- a/nerv/examples/lmptb/lmptb/lmutil.lua
+++ b/nerv/examples/lmptb/lmptb/lmutil.lua
@@ -124,7 +124,7 @@ function Timer:__init()
end
function Timer:tic(item)
- self.last[item] = os.time()
+ self.last[item] = os.clock()
end
function Timer:toc(item)
@@ -134,11 +134,11 @@ function Timer:toc(item)
if (self.rec[item] == nil) then
self.rec[item] = 0
end
- self.rec[item] = self.rec[item] + os.difftime(os.time(), self.last[item])
+ self.rec[item] = self.rec[item] + os.clock() - self.last[item]
end
function Timer:flush()
for key, value in pairs(self.rec) do
- self.rec[key] = 0
+ self.rec[key] = nil
end
end
diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua
index a2809bf..0fcff36 100644
--- a/nerv/layer/affine.lua
+++ b/nerv/layer/affine.lua
@@ -24,21 +24,21 @@ function MatrixParam:update(gradient)
local mmt_gain = 1.0 / (1.0 - gconf.momentum);
local n = self.gconf.batch_size * mmt_gain
-- perform update
- self.trans:add(self.trans, self.correction, 1.0, -gconf.lrate / n)
+ self.trans:add(self.trans, self.correction, 1.0 - gconf.lrate*gconf.wcost/gconf.batch_size, -gconf.lrate / n)
end
function LinearTransParam:update(gradient)
MatrixParam.update(self, gradient)
- local gconf = self.gconf
- -- weight decay
- self.trans:add(self.trans, self.trans, 1.0, -gconf.lrate * gconf.wcost / gconf.batch_size)
+ -- local gconf = self.gconf
+ -- weight decay(put into MatrixParam:update)
+ -- self.trans:add(self.trans, self.trans, 1.0, -gconf.lrate * gconf.wcost / gconf.batch_size)
end
function BiasParam:update(gradient)
MatrixParam.update(self, gradient)
- local gconf = self.gconf
+ -- local gconf = self.gconf
-- weight decay
- self.trans:add(self.trans, self.trans, 1.0, -gconf.lrate * gconf.wcost / gconf.batch_size)
+ -- self.trans:add(self.trans, self.trans, 1.0, -gconf.lrate * gconf.wcost / gconf.batch_size)
end
function AffineLayer:__init(id, global_conf, layer_conf)
@@ -76,12 +76,13 @@ function AffineLayer:update(bp_err, input, output)
local gconf = self.gconf
if (gconf.momentum > 0) then
self.ltp.correction:mul(input[1], bp_err[1], 1.0, gconf.momentum, 'T', 'N')
+ self.bp.correction:add(self.bp.correction, bp_err[1]:colsum(), gconf.momentum, 1)
-- momentum gain
local mmt_gain = 1.0 / (1.0 - gconf.momentum);
local n = self.gconf.batch_size * mmt_gain
-- perform update
- self.ltp.trans:add(self.ltp.trans, self.ltp.correction, 1.0, -gconf.lrate / n)
- self.bp.trans:add(self.bp.trans, bp_err[1]:colsum(), 1.0-gconf.lrate*gconf.wcost, -gconf.lrate / gconf.batch_size)
+ self.ltp.trans:add(self.ltp.trans, self.ltp.correction, 1.0-gconf.lrate*gconf.wcost/gconf.batch_size, -gconf.lrate / n)
+ self.bp.trans:add(self.bp.trans, self.bp.correction, 1.0-gconf.lrate*gconf.wcost/gconf.batch_size, -gconf.lrate / n)
else
self.ltp.trans:mul(input[1], bp_err[1], -gconf.lrate / gconf.batch_size, 1.0-gconf.lrate*gconf.wcost/gconf.batch_size, 'T', 'N')
self.bp.trans:add(self.bp.trans, bp_err[1]:colsum(), 1.0-gconf.lrate*gconf.wcost/gconf.batch_size, -gconf.lrate / gconf.batch_size)