prepare for the replacement of `asr_trainer.lua` with `trainer.lua`

author: Determinant <ted.sybil@gmail.com> 2016-05-08 11:38:28 +0800
committer: Determinant <ted.sybil@gmail.com> 2016-05-08 11:38:28 +0800
commit: 88b3f2a13fa3c01a684259e85ee8298e35f2ac09 (patch)
tree: 1c5ff4e2759ea88f6a9daa5fcafbc07d91951c00
parent: e3ed809bb7d5d11b5b2cec559955b15db18db915 (diff)
6 files changed, 91 insertions, 46 deletions
diff --git a/nerv/examples/trainer.lua b/nerv/examples/trainer.lua
index 783ff1d..8e3efcb 100644
--- a/nerv/examples/trainer.lua
+++ b/nerv/examples/trainer.lua
@@ -1,9 +1,9 @@
 require 'lfs'
 require 'pl'
 
--- =======================================================
--- Deal with command line input & init training envrioment
--- =======================================================
+-- =========================================================
+--  Deal with command line input & init training envrioment
+-- =========================================================
 
 local function check_and_add_defaults(spec, opts)
     local function get_opt_val(k)
@@ -14,15 +14,14 @@ local function check_and_add_defaults(spec, opts)
     if opt_v then
         nerv.info("resuming from previous training state")
         gconf = dofile(opt_v)
-    else
-        for k, v in pairs(spec) do
-            local opt_v, specified = get_opt_val(k)
-            if (not specified) and gconf[k] ~= nil then
-                nerv.info("using setting in network config file: %s = %s", k, gconf[k])
-            elseif opt_v ~= nil then
-                nerv.info("using setting in options: %s = %s", k, opt_v)
-                gconf[k] = opt_v
-            end
+    end
+    for k, v in pairs(spec) do
+        local opt_v, specified = get_opt_val(k)
+        if (not specified) and gconf[k] ~= nil then
+            nerv.info("using setting in network config file: %s = %s", k, gconf[k])
+        elseif opt_v ~= nil then
+            nerv.info("using setting in options: %s = %s", k, opt_v)
+            gconf[k] = opt_v
         end
     end
 end
@@ -65,6 +64,7 @@ end
 
 local trainer_defaults = {
     lrate = 0.8,
+    hfactor = 0.5,
     batch_size = 256,
     chunk_size = 1,
     buffer_size = 81920,
@@ -125,7 +125,8 @@ end
 
 local date_pattern = "%Y-%m-%d_%H:%M:%S"
 local logfile_name = "log"
-local working_dir = opts["dir"].val or string.format("nerv_%s", os.date(date_pattern))
+local working_dir = opts["dir"].val or
+                    string.format("nerv_%s", os.date(date_pattern))
 gconf.working_dir = working_dir
 gconf.date_pattern = date_pattern
 
@@ -139,9 +140,9 @@ dir.copyfile(script, working_dir)
 -- set logfile path
 nerv.set_logfile(path.join(working_dir, logfile_name))
 
--- =============
--- main function
--- =============
+-- ============
+--  Main loop
+-- ============
 
 local trainer = gconf.trainer(gconf)
 trainer:training_preprocess()
@@ -160,6 +161,7 @@ for i = gconf.cur_iter, gconf.max_iter do
         local test_err = trainer:process('test', false)
         nerv.info('[TE] testset error %d: %.3f', i, test_err)
     end
-    trainer:halving(train_err, cv_err)
+    trainer:save_params(train_err, cv_err)
 end
+dump_gconf(path.join(working_dir, string.format("iter_%d.meta", gconf.max_iter + 1)))
 trainer:training_afterprocess()
diff --git a/nerv/layer/bias.lua b/nerv/layer/bias.lua
index 03e310d..d3c7cdb 100644
--- a/nerv/layer/bias.lua
+++ b/nerv/layer/bias.lua
@@ -11,6 +11,9 @@ function BiasLayer:bind_params()
                                 nerv.BiasParam,
                                 {1, self.dim_out[1]},
                                 nerv.Param.gen_zero)
+    if self.lconf.no_update_all then
+        self.bias.no_update = true
+    end
 end
 
 function BiasLayer:init()
@@ -34,3 +37,9 @@ end
 function BiasLayer:get_params()
     return nerv.ParamRepo({self.bias}, self.loc_type)
 end
+
+function BiasLayer:back_propagate()
+end
+
+function BiasLayer:update()
+end
diff --git a/nerv/layer/graph.lua b/nerv/layer/graph.lua
index 5b5d4c7..f8462f7 100644
--- a/nerv/layer/graph.lua
+++ b/nerv/layer/graph.lua
@@ -17,7 +17,7 @@ local GraphLayer = nerv.class('nerv.GraphLayer', 'nerv.Layer')
 -- @param layer_conf a table providing with settings dedicated for the layer,
 -- the following fields should be specified:
 --
--- * `lrepo`: the layer repo that should be used to find the sub-level layers
+-- * `layer_repo`: the layer repo that should be used to find the sub-level layers
 -- * `connections`: an array of 3-tuples describing the connections of
 --   sub-level layers, the structure is as follow:
 --
@@ -33,8 +33,8 @@ local GraphLayer = nerv.class('nerv.GraphLayer', 'nerv.Layer')
 --
 --        <layer_id>[<port_idx>]
 --   where the `<layer_id>` is a string that identifies the layer in
---   `lconf.lrepo`, and `<port_id>` is the input or output port index when used
---   in the first or second port specification respectively.
+--   `layer_conf.layer_repo`, and `<port_id>` is the input or output port index
+--   when used in the first or second port specification respectively.
 --
 --   The third element in the tuple is an integer specifying the time delay of
 --   this connection. In most cases, it will be simply zero. But for an
diff --git a/nerv/layer/window.lua b/nerv/layer/window.lua
index 729ab58..fb74b14 100644
--- a/nerv/layer/window.lua
+++ b/nerv/layer/window.lua
@@ -11,6 +11,9 @@ function WindowLayer:bind_params()
                                 nerv.BiasParam,
                                 {1, self.dim_out[1]},
                                 nerv.Param.gen_zero)
+    if self.lconf.no_update_all then
+        self.window.no_update = true
+    end
 end
 
 function WindowLayer:init()
@@ -34,3 +37,9 @@ end
 function WindowLayer:get_params()
     return nerv.ParamRepo({self.window}, self.loc_type)
 end
+
+function WindowLayer:back_propagate()
+end
+
+function WindowLayer:update()
+end
diff --git a/nerv/nerv b/nerv/nerv
index 4c20ec7..1b32a4e 100644
--- a/nerv/nerv
+++ b/nerv/nerv
@@ -10,7 +10,7 @@ local function print_help()
     nerv.print_usage(options)
 end
 
-nerv.printf("*** NERV: A Lua-based toolkit for high-performance deep learning (alpha) ***\n")
+nerv.printf("*** NERV: A Lua-based toolkit for high-performance deep learning (beta) ***\n")
 arg, opts = nerv.parse_args(arg, options)
 if #arg < 1 or opts["help"].val then
     print_help()
diff --git a/nerv/nn/trainer.lua b/nerv/nn/trainer.lua
index 4ae08d9..44390ea 100644
--- a/nerv/nn/trainer.lua
+++ b/nerv/nn/trainer.lua
@@ -1,8 +1,8 @@
 local trainer = nerv.class('nerv.Trainer')
 
 function trainer:__init(gconf)
-    self.gconf = gconf
     local mat_type
+    self.gconf = gconf
     self.src_loc_type = nerv.ParamRepo.LOC_TYPES.ON_HOST
     local src_loc_type = self.src_loc_type
     if gconf.use_cpu then
@@ -13,16 +13,19 @@ function trainer:__init(gconf)
         self.train_loc_type = nerv.ParamRepo.LOC_TYPES.ON_DEVICE
     end
     local train_loc_type = self.train_loc_type
-
     local host_param_repo = nerv.ParamRepo()
+    -- import the parameters from chunk files
     host_param_repo:import(gconf.initialized_param, gconf)
     local param_repo = host_param_repo:copy(train_loc_type, gconf)
+    -- create layers and establish initial bindings
     self.layer_repo = self:make_layer_repo(param_repo)
     local layer_repo = self.layer_repo
+    -- compile the network to be trained
     local graph = self:get_network(layer_repo)
     self.input_order = self:get_input_order()
-
-    self.network = nerv.Network('network', gconf, {network = graph, clip = gconf.clip})
+    self.network = nerv.Network('network', gconf,
+                                {network = graph,
+                                 clip = gconf.clip})
     local network = self.network
     network:init(gconf.batch_size, gconf.chunk_size)
 
@@ -31,9 +34,9 @@ function trainer:__init(gconf)
     local err_output = self.err_output
     for i = 1, #dim_in do
         err_output[i] = {}
-        local tmp = mat_type(gconf.batch_size, dim_in[i])
+        local dummy = mat_type(gconf.batch_size, dim_in[i])
         for t = 1, gconf.chunk_size do
-            err_output[i][t] = tmp
+            table.insert(err_output[i], dummy)
         end
     end
     self.output = {}
@@ -43,16 +46,19 @@ function trainer:__init(gconf)
     for i = 1, #dim_out do
         output[i] = {}
         for t = 1, gconf.chunk_size do
-            output[i][t] = mat_type(gconf.batch_size, dim_out[i])
+            table.insert(output[i], mat_type(gconf.batch_size, dim_out[i]))
         end
         err_input[i] = {}
-        local tmp = mat_type(gconf.batch_size, dim_out[i])
-        tmp:fill(0)
+        if dim_out[i] ~= 1 then
+            nerv.warning("the output has multiple heads, the default " ..
+                        "`err_input` will be zero")
+        end
         for t = 1, gconf.chunk_size do
             if dim_out[i] == 1 then
-                err_input[i][t] = gconf.mask[t]
+                table.insert(err_input[i], gconf.mask[t])
             else
-                err_input[i][t] = tmp
+                table.insert(err_input[i], mat_type(gconf.batch_size, dim_out[i]))
+                err_input[i][t]:fill(0)
             end
         end
     end
@@ -89,15 +95,16 @@ function trainer:process(dataset, do_train)
     local err_output = self.err_output
     network:epoch_init()
 
-    while true do
-        local data = buffer:get_data()
-        if data == nil then
-            break
-        end
-
+    for data in buffer.get_data, buffer do
         cnt = cnt + 1
-        local info = {input = {}, output = output, err_input = err_input, err_output = err_output,
-            do_train = do_train, seq_length = data.seq_length, new_seq = data.new_seq}
+        local info = {input = {},
+                      output = output,
+                      err_input = err_input,
+                      err_output = err_output,
+                      do_train = do_train,
+                      seq_length = data.seq_length,
+                      new_seq = data.new_seq}
+
         for i = 1, #network.dim_in do
             info.input[i] = data.data[input_order[i]]
         end
@@ -105,7 +112,7 @@ function trainer:process(dataset, do_train)
         self:mini_batch_preprocess(cnt, info)
         network:mini_batch_init(info)
         network:propagate()
-        self:mini_batch_middleprocess(cnt, info)
+        self:mini_batch_inprocess(cnt, info)
         if do_train then
             network:back_propagate()
             network:update()
@@ -119,18 +126,31 @@ function trainer:process(dataset, do_train)
     return self:get_error()
 end
 
-function trainer:halving(train_err, cv_err)
+function trainer:if_accept(cv_err)
+    return cv_err < gconf.best_cv
+end
+
+function trainer:do_halving()
+    gconf.lrate = gconf.lrate * gconf.hfactor
+end
+
+function trainer:save_params(train_err, cv_err)
     local gconf = self.gconf
     local src_loc_type = self.src_loc_type
     local train_loc_type = self.train_loc_type
     local layer_repo = self.layer_repo
-    local param_fname = string.format('%s_iter_%d_lr%f_tr%.3f_cv%.3f.nerv', os.date(gconf.date_pattern), gconf.cur_iter, gconf.lrate, train_err, cv_err)
+    local param_fname = string.format('%s_iter_%d_lr%f_tr%.3f_cv%.3f.nerv',
+                                      os.date(gconf.date_pattern),
+                                      gconf.cur_iter,
+                                      gconf.lrate,
+                                      train_err,
+                                      cv_err)
     param_fname = path.join(gconf.working_dir, param_fname)
     local network = self.network
     local host_param_repo = network:get_params():copy(src_loc_type, gconf)
     host_param_repo:export(param_fname)
 
-    if cv_err < gconf.best_cv then
+    if self:if_accept(cv_err) then
         nerv.info("accepting the trained params")
         gconf.best_cv = cv_err
         gconf.initialized_param = {param_fname}
@@ -140,8 +160,9 @@ function trainer:halving(train_err, cv_err)
         host_param_repo = nerv.ParamRepo()
         host_param_repo:import(gconf.initialized_param, gconf)
         local param_repo = host_param_repo:copy(train_loc_type, gconf)
+        -- rebind the parameters
         layer_repo:rebind(param_repo)
-        gconf.lrate = gconf.lrate * 0.5
+        self:do_halving()
     end
 end
 
@@ -160,7 +181,7 @@ end
 function trainer:mini_batch_preprocess(cnt, info)
 end
 
-function trainer:mini_batch_middleprocess(cnt, info)
+function trainer:mini_batch_inprocess(cnt, info)
 end
 
 function trainer:mini_batch_afterprocess(cnt, info)
@@ -181,3 +202,7 @@ end
 function trainer:get_input_order()
     nerv.error_method_not_implemented()
 end
+
+function trainer:get_error()
+    nerv.error_method_not_implemented()
+end
author	Determinant <ted.sybil@gmail.com>	2016-05-08 11:38:28 +0800
committer	Determinant <ted.sybil@gmail.com>	2016-05-08 11:38:28 +0800
commit	88b3f2a13fa3c01a684259e85ee8298e35f2ac09 (patch)
tree	1c5ff4e2759ea88f6a9daa5fcafbc07d91951c00
parent	e3ed809bb7d5d11b5b2cec559955b15db18db915 (diff)