diff options
-rw-r--r-- | nerv/examples/trainer.lua | 36 | ||||
-rw-r--r-- | nerv/layer/bias.lua | 9 | ||||
-rw-r--r-- | nerv/layer/graph.lua | 6 | ||||
-rw-r--r-- | nerv/layer/window.lua | 9 | ||||
-rw-r--r-- | nerv/nerv | 2 | ||||
-rw-r--r-- | nerv/nn/trainer.lua | 75 |
6 files changed, 91 insertions, 46 deletions
diff --git a/nerv/examples/trainer.lua b/nerv/examples/trainer.lua index 783ff1d..8e3efcb 100644 --- a/nerv/examples/trainer.lua +++ b/nerv/examples/trainer.lua @@ -1,9 +1,9 @@ require 'lfs' require 'pl' --- ======================================================= --- Deal with command line input & init training envrioment --- ======================================================= +-- ========================================================= +-- Deal with command line input & init training envrioment +-- ========================================================= local function check_and_add_defaults(spec, opts) local function get_opt_val(k) @@ -14,15 +14,14 @@ local function check_and_add_defaults(spec, opts) if opt_v then nerv.info("resuming from previous training state") gconf = dofile(opt_v) - else - for k, v in pairs(spec) do - local opt_v, specified = get_opt_val(k) - if (not specified) and gconf[k] ~= nil then - nerv.info("using setting in network config file: %s = %s", k, gconf[k]) - elseif opt_v ~= nil then - nerv.info("using setting in options: %s = %s", k, opt_v) - gconf[k] = opt_v - end + end + for k, v in pairs(spec) do + local opt_v, specified = get_opt_val(k) + if (not specified) and gconf[k] ~= nil then + nerv.info("using setting in network config file: %s = %s", k, gconf[k]) + elseif opt_v ~= nil then + nerv.info("using setting in options: %s = %s", k, opt_v) + gconf[k] = opt_v end end end @@ -65,6 +64,7 @@ end local trainer_defaults = { lrate = 0.8, + hfactor = 0.5, batch_size = 256, chunk_size = 1, buffer_size = 81920, @@ -125,7 +125,8 @@ end local date_pattern = "%Y-%m-%d_%H:%M:%S" local logfile_name = "log" -local working_dir = opts["dir"].val or string.format("nerv_%s", os.date(date_pattern)) +local working_dir = opts["dir"].val or + string.format("nerv_%s", os.date(date_pattern)) gconf.working_dir = working_dir gconf.date_pattern = date_pattern @@ -139,9 +140,9 @@ dir.copyfile(script, working_dir) -- set logfile path nerv.set_logfile(path.join(working_dir, logfile_name)) --- ============= --- main function --- ============= +-- ============ +-- Main loop +-- ============ local trainer = gconf.trainer(gconf) trainer:training_preprocess() @@ -160,6 +161,7 @@ for i = gconf.cur_iter, gconf.max_iter do local test_err = trainer:process('test', false) nerv.info('[TE] testset error %d: %.3f', i, test_err) end - trainer:halving(train_err, cv_err) + trainer:save_params(train_err, cv_err) end +dump_gconf(path.join(working_dir, string.format("iter_%d.meta", gconf.max_iter + 1))) trainer:training_afterprocess() diff --git a/nerv/layer/bias.lua b/nerv/layer/bias.lua index 03e310d..d3c7cdb 100644 --- a/nerv/layer/bias.lua +++ b/nerv/layer/bias.lua @@ -11,6 +11,9 @@ function BiasLayer:bind_params() nerv.BiasParam, {1, self.dim_out[1]}, nerv.Param.gen_zero) + if self.lconf.no_update_all then + self.bias.no_update = true + end end function BiasLayer:init() @@ -34,3 +37,9 @@ end function BiasLayer:get_params() return nerv.ParamRepo({self.bias}, self.loc_type) end + +function BiasLayer:back_propagate() +end + +function BiasLayer:update() +end diff --git a/nerv/layer/graph.lua b/nerv/layer/graph.lua index 5b5d4c7..f8462f7 100644 --- a/nerv/layer/graph.lua +++ b/nerv/layer/graph.lua @@ -17,7 +17,7 @@ local GraphLayer = nerv.class('nerv.GraphLayer', 'nerv.Layer') -- @param layer_conf a table providing with settings dedicated for the layer, -- the following fields should be specified: -- --- * `lrepo`: the layer repo that should be used to find the sub-level layers +-- * `layer_repo`: the layer repo that should be used to find the sub-level layers -- * `connections`: an array of 3-tuples describing the connections of -- sub-level layers, the structure is as follow: -- @@ -33,8 +33,8 @@ local GraphLayer = nerv.class('nerv.GraphLayer', 'nerv.Layer') -- -- <layer_id>[<port_idx>] -- where the `<layer_id>` is a string that identifies the layer in --- `lconf.lrepo`, and `<port_id>` is the input or output port index when used --- in the first or second port specification respectively. +-- `layer_conf.layer_repo`, and `<port_id>` is the input or output port index +-- when used in the first or second port specification respectively. -- -- The third element in the tuple is an integer specifying the time delay of -- this connection. In most cases, it will be simply zero. But for an diff --git a/nerv/layer/window.lua b/nerv/layer/window.lua index 729ab58..fb74b14 100644 --- a/nerv/layer/window.lua +++ b/nerv/layer/window.lua @@ -11,6 +11,9 @@ function WindowLayer:bind_params() nerv.BiasParam, {1, self.dim_out[1]}, nerv.Param.gen_zero) + if self.lconf.no_update_all then + self.window.no_update = true + end end function WindowLayer:init() @@ -34,3 +37,9 @@ end function WindowLayer:get_params() return nerv.ParamRepo({self.window}, self.loc_type) end + +function WindowLayer:back_propagate() +end + +function WindowLayer:update() +end @@ -10,7 +10,7 @@ local function print_help() nerv.print_usage(options) end -nerv.printf("*** NERV: A Lua-based toolkit for high-performance deep learning (alpha) ***\n") +nerv.printf("*** NERV: A Lua-based toolkit for high-performance deep learning (beta) ***\n") arg, opts = nerv.parse_args(arg, options) if #arg < 1 or opts["help"].val then print_help() diff --git a/nerv/nn/trainer.lua b/nerv/nn/trainer.lua index 4ae08d9..44390ea 100644 --- a/nerv/nn/trainer.lua +++ b/nerv/nn/trainer.lua @@ -1,8 +1,8 @@ local trainer = nerv.class('nerv.Trainer') function trainer:__init(gconf) - self.gconf = gconf local mat_type + self.gconf = gconf self.src_loc_type = nerv.ParamRepo.LOC_TYPES.ON_HOST local src_loc_type = self.src_loc_type if gconf.use_cpu then @@ -13,16 +13,19 @@ function trainer:__init(gconf) self.train_loc_type = nerv.ParamRepo.LOC_TYPES.ON_DEVICE end local train_loc_type = self.train_loc_type - local host_param_repo = nerv.ParamRepo() + -- import the parameters from chunk files host_param_repo:import(gconf.initialized_param, gconf) local param_repo = host_param_repo:copy(train_loc_type, gconf) + -- create layers and establish initial bindings self.layer_repo = self:make_layer_repo(param_repo) local layer_repo = self.layer_repo + -- compile the network to be trained local graph = self:get_network(layer_repo) self.input_order = self:get_input_order() - - self.network = nerv.Network('network', gconf, {network = graph, clip = gconf.clip}) + self.network = nerv.Network('network', gconf, + {network = graph, + clip = gconf.clip}) local network = self.network network:init(gconf.batch_size, gconf.chunk_size) @@ -31,9 +34,9 @@ function trainer:__init(gconf) local err_output = self.err_output for i = 1, #dim_in do err_output[i] = {} - local tmp = mat_type(gconf.batch_size, dim_in[i]) + local dummy = mat_type(gconf.batch_size, dim_in[i]) for t = 1, gconf.chunk_size do - err_output[i][t] = tmp + table.insert(err_output[i], dummy) end end self.output = {} @@ -43,16 +46,19 @@ function trainer:__init(gconf) for i = 1, #dim_out do output[i] = {} for t = 1, gconf.chunk_size do - output[i][t] = mat_type(gconf.batch_size, dim_out[i]) + table.insert(output[i], mat_type(gconf.batch_size, dim_out[i])) end err_input[i] = {} - local tmp = mat_type(gconf.batch_size, dim_out[i]) - tmp:fill(0) + if dim_out[i] ~= 1 then + nerv.warning("the output has multiple heads, the default " .. + "`err_input` will be zero") + end for t = 1, gconf.chunk_size do if dim_out[i] == 1 then - err_input[i][t] = gconf.mask[t] + table.insert(err_input[i], gconf.mask[t]) else - err_input[i][t] = tmp + table.insert(err_input[i], mat_type(gconf.batch_size, dim_out[i])) + err_input[i][t]:fill(0) end end end @@ -89,15 +95,16 @@ function trainer:process(dataset, do_train) local err_output = self.err_output network:epoch_init() - while true do - local data = buffer:get_data() - if data == nil then - break - end - + for data in buffer.get_data, buffer do cnt = cnt + 1 - local info = {input = {}, output = output, err_input = err_input, err_output = err_output, - do_train = do_train, seq_length = data.seq_length, new_seq = data.new_seq} + local info = {input = {}, + output = output, + err_input = err_input, + err_output = err_output, + do_train = do_train, + seq_length = data.seq_length, + new_seq = data.new_seq} + for i = 1, #network.dim_in do info.input[i] = data.data[input_order[i]] end @@ -105,7 +112,7 @@ function trainer:process(dataset, do_train) self:mini_batch_preprocess(cnt, info) network:mini_batch_init(info) network:propagate() - self:mini_batch_middleprocess(cnt, info) + self:mini_batch_inprocess(cnt, info) if do_train then network:back_propagate() network:update() @@ -119,18 +126,31 @@ function trainer:process(dataset, do_train) return self:get_error() end -function trainer:halving(train_err, cv_err) +function trainer:if_accept(cv_err) + return cv_err < gconf.best_cv +end + +function trainer:do_halving() + gconf.lrate = gconf.lrate * gconf.hfactor +end + +function trainer:save_params(train_err, cv_err) local gconf = self.gconf local src_loc_type = self.src_loc_type local train_loc_type = self.train_loc_type local layer_repo = self.layer_repo - local param_fname = string.format('%s_iter_%d_lr%f_tr%.3f_cv%.3f.nerv', os.date(gconf.date_pattern), gconf.cur_iter, gconf.lrate, train_err, cv_err) + local param_fname = string.format('%s_iter_%d_lr%f_tr%.3f_cv%.3f.nerv', + os.date(gconf.date_pattern), + gconf.cur_iter, + gconf.lrate, + train_err, + cv_err) param_fname = path.join(gconf.working_dir, param_fname) local network = self.network local host_param_repo = network:get_params():copy(src_loc_type, gconf) host_param_repo:export(param_fname) - if cv_err < gconf.best_cv then + if self:if_accept(cv_err) then nerv.info("accepting the trained params") gconf.best_cv = cv_err gconf.initialized_param = {param_fname} @@ -140,8 +160,9 @@ function trainer:halving(train_err, cv_err) host_param_repo = nerv.ParamRepo() host_param_repo:import(gconf.initialized_param, gconf) local param_repo = host_param_repo:copy(train_loc_type, gconf) + -- rebind the parameters layer_repo:rebind(param_repo) - gconf.lrate = gconf.lrate * 0.5 + self:do_halving() end end @@ -160,7 +181,7 @@ end function trainer:mini_batch_preprocess(cnt, info) end -function trainer:mini_batch_middleprocess(cnt, info) +function trainer:mini_batch_inprocess(cnt, info) end function trainer:mini_batch_afterprocess(cnt, info) @@ -181,3 +202,7 @@ end function trainer:get_input_order() nerv.error_method_not_implemented() end + +function trainer:get_error() + nerv.error_method_not_implemented() +end |