aboutsummaryrefslogtreecommitdiff
path: root/nerv/nn/trainer.lua
diff options
context:
space:
mode:
Diffstat (limited to 'nerv/nn/trainer.lua')
-rw-r--r--nerv/nn/trainer.lua75
1 files changed, 50 insertions, 25 deletions
diff --git a/nerv/nn/trainer.lua b/nerv/nn/trainer.lua
index 4ae08d9..44390ea 100644
--- a/nerv/nn/trainer.lua
+++ b/nerv/nn/trainer.lua
@@ -1,8 +1,8 @@
local trainer = nerv.class('nerv.Trainer')
function trainer:__init(gconf)
- self.gconf = gconf
local mat_type
+ self.gconf = gconf
self.src_loc_type = nerv.ParamRepo.LOC_TYPES.ON_HOST
local src_loc_type = self.src_loc_type
if gconf.use_cpu then
@@ -13,16 +13,19 @@ function trainer:__init(gconf)
self.train_loc_type = nerv.ParamRepo.LOC_TYPES.ON_DEVICE
end
local train_loc_type = self.train_loc_type
-
local host_param_repo = nerv.ParamRepo()
+ -- import the parameters from chunk files
host_param_repo:import(gconf.initialized_param, gconf)
local param_repo = host_param_repo:copy(train_loc_type, gconf)
+ -- create layers and establish initial bindings
self.layer_repo = self:make_layer_repo(param_repo)
local layer_repo = self.layer_repo
+ -- compile the network to be trained
local graph = self:get_network(layer_repo)
self.input_order = self:get_input_order()
-
- self.network = nerv.Network('network', gconf, {network = graph, clip = gconf.clip})
+ self.network = nerv.Network('network', gconf,
+ {network = graph,
+ clip = gconf.clip})
local network = self.network
network:init(gconf.batch_size, gconf.chunk_size)
@@ -31,9 +34,9 @@ function trainer:__init(gconf)
local err_output = self.err_output
for i = 1, #dim_in do
err_output[i] = {}
- local tmp = mat_type(gconf.batch_size, dim_in[i])
+ local dummy = mat_type(gconf.batch_size, dim_in[i])
for t = 1, gconf.chunk_size do
- err_output[i][t] = tmp
+ table.insert(err_output[i], dummy)
end
end
self.output = {}
@@ -43,16 +46,19 @@ function trainer:__init(gconf)
for i = 1, #dim_out do
output[i] = {}
for t = 1, gconf.chunk_size do
- output[i][t] = mat_type(gconf.batch_size, dim_out[i])
+ table.insert(output[i], mat_type(gconf.batch_size, dim_out[i]))
end
err_input[i] = {}
- local tmp = mat_type(gconf.batch_size, dim_out[i])
- tmp:fill(0)
+ if dim_out[i] ~= 1 then
+ nerv.warning("the output has multiple heads, the default " ..
+ "`err_input` will be zero")
+ end
for t = 1, gconf.chunk_size do
if dim_out[i] == 1 then
- err_input[i][t] = gconf.mask[t]
+ table.insert(err_input[i], gconf.mask[t])
else
- err_input[i][t] = tmp
+ table.insert(err_input[i], mat_type(gconf.batch_size, dim_out[i]))
+ err_input[i][t]:fill(0)
end
end
end
@@ -89,15 +95,16 @@ function trainer:process(dataset, do_train)
local err_output = self.err_output
network:epoch_init()
- while true do
- local data = buffer:get_data()
- if data == nil then
- break
- end
-
+ for data in buffer.get_data, buffer do
cnt = cnt + 1
- local info = {input = {}, output = output, err_input = err_input, err_output = err_output,
- do_train = do_train, seq_length = data.seq_length, new_seq = data.new_seq}
+ local info = {input = {},
+ output = output,
+ err_input = err_input,
+ err_output = err_output,
+ do_train = do_train,
+ seq_length = data.seq_length,
+ new_seq = data.new_seq}
+
for i = 1, #network.dim_in do
info.input[i] = data.data[input_order[i]]
end
@@ -105,7 +112,7 @@ function trainer:process(dataset, do_train)
self:mini_batch_preprocess(cnt, info)
network:mini_batch_init(info)
network:propagate()
- self:mini_batch_middleprocess(cnt, info)
+ self:mini_batch_inprocess(cnt, info)
if do_train then
network:back_propagate()
network:update()
@@ -119,18 +126,31 @@ function trainer:process(dataset, do_train)
return self:get_error()
end
-function trainer:halving(train_err, cv_err)
+function trainer:if_accept(cv_err)
+ return cv_err < gconf.best_cv
+end
+
+function trainer:do_halving()
+ gconf.lrate = gconf.lrate * gconf.hfactor
+end
+
+function trainer:save_params(train_err, cv_err)
local gconf = self.gconf
local src_loc_type = self.src_loc_type
local train_loc_type = self.train_loc_type
local layer_repo = self.layer_repo
- local param_fname = string.format('%s_iter_%d_lr%f_tr%.3f_cv%.3f.nerv', os.date(gconf.date_pattern), gconf.cur_iter, gconf.lrate, train_err, cv_err)
+ local param_fname = string.format('%s_iter_%d_lr%f_tr%.3f_cv%.3f.nerv',
+ os.date(gconf.date_pattern),
+ gconf.cur_iter,
+ gconf.lrate,
+ train_err,
+ cv_err)
param_fname = path.join(gconf.working_dir, param_fname)
local network = self.network
local host_param_repo = network:get_params():copy(src_loc_type, gconf)
host_param_repo:export(param_fname)
- if cv_err < gconf.best_cv then
+ if self:if_accept(cv_err) then
nerv.info("accepting the trained params")
gconf.best_cv = cv_err
gconf.initialized_param = {param_fname}
@@ -140,8 +160,9 @@ function trainer:halving(train_err, cv_err)
host_param_repo = nerv.ParamRepo()
host_param_repo:import(gconf.initialized_param, gconf)
local param_repo = host_param_repo:copy(train_loc_type, gconf)
+ -- rebind the parameters
layer_repo:rebind(param_repo)
- gconf.lrate = gconf.lrate * 0.5
+ self:do_halving()
end
end
@@ -160,7 +181,7 @@ end
function trainer:mini_batch_preprocess(cnt, info)
end
-function trainer:mini_batch_middleprocess(cnt, info)
+function trainer:mini_batch_inprocess(cnt, info)
end
function trainer:mini_batch_afterprocess(cnt, info)
@@ -181,3 +202,7 @@ end
function trainer:get_input_order()
nerv.error_method_not_implemented()
end
+
+function trainer:get_error()
+ nerv.error_method_not_implemented()
+end