From a32195e3e2ae9ca0f0c7a82e73e6bddb64568c05 Mon Sep 17 00:00:00 2001
From: Determinant <ted.sybil@gmail.com>
Date: Thu, 10 Mar 2016 13:40:11 +0800
Subject: major change: clearer param binding semantics; permit rebinding;
 enable resuming from previous training

---
 nerv/examples/asr_trainer.lua        | 183 +++++++++++++++++++++++------------
 nerv/examples/swb_baseline.lua       |  77 ++++++++-------
 nerv/examples/swb_baseline2.lua      |  77 ++++++++-------
 nerv/examples/swb_baseline_basic.lua | 162 -------------------------------
 nerv/examples/timit_baseline2.lua    |  64 ++++++------
 nerv/init.lua                        |  75 +++++++-------
 nerv/layer/affine.lua                |  43 ++++----
 nerv/layer/bias.lua                  |  15 +--
 nerv/layer/combiner.lua              |  16 ++-
 nerv/layer/dropout.lua               |  16 ++-
 nerv/layer/elem_mul.lua              |  11 ++-
 nerv/layer/gru.lua                   |  20 ++--
 nerv/layer/init.lua                  |  60 ++++++++----
 nerv/layer/lstm.lua                  |  20 ++--
 nerv/layer/lstm_gate.lua             |  17 ++--
 nerv/layer/mse.lua                   |  16 ++-
 nerv/layer/sigmoid.lua               |  11 ++-
 nerv/layer/softmax.lua               |  11 ++-
 nerv/layer/softmax_ce.lua            |  16 ++-
 nerv/layer/tanh.lua                  |  11 ++-
 nerv/layer/window.lua                |  15 +--
 nerv/matrix/init.lua                 |  21 +++-
 nerv/nerv                            |   4 +
 nerv/nn/layer_dag.lua                |  16 ++-
 nerv/nn/layer_repo.lua               |  30 +++---
 nerv/nn/param_repo.lua               |  59 ++++++++++-
 26 files changed, 526 insertions(+), 540 deletions(-)
 delete mode 100644 nerv/examples/swb_baseline_basic.lua

diff --git a/nerv/examples/asr_trainer.lua b/nerv/examples/asr_trainer.lua
index 5001e12..5bf28bd 100644
--- a/nerv/examples/asr_trainer.lua
+++ b/nerv/examples/asr_trainer.lua
@@ -1,19 +1,33 @@
 require 'lfs'
 require 'pl'
 local function build_trainer(ifname)
-    local param_repo = nerv.ParamRepo()
-    param_repo:import(ifname, nil, gconf)
-    local layer_repo = make_layer_repo(param_repo)
-    local network = get_network(layer_repo)
-    local global_transf = get_global_transf(layer_repo)
-    local input_order = get_input_order()
+    local host_param_repo = nerv.ParamRepo()
     local mat_type
+    local src_loc_type
+    local train_loc_type
+    host_param_repo:import(ifname, nil, gconf)
     if gconf.use_cpu then
         mat_type = gconf.mmat_type
+        src_loc_type = nerv.ParamRepo.LOC_TYPES.ON_HOST
+        train_loc_type = nerv.ParamRepo.LOC_TYPES.ON_HOST
     else
         mat_type = gconf.cumat_type
+        src_loc_type = nerv.ParamRepo.LOC_TYPES.ON_HOST
+        train_loc_type = nerv.ParamRepo.LOC_TYPES.ON_DEVICE
     end
-    local iterative_trainer = function (prefix, scp_file, bp)
+    local param_repo = host_param_repo:copy(train_loc_type)
+    local layer_repo = make_layer_repo(param_repo)
+    local network = get_network(layer_repo)
+    local global_transf = get_global_transf(layer_repo)
+    local input_order = get_input_order()
+    local iterative_trainer = function (prefix, scp_file, bp, rebind_param_repo)
+        -- rebind the params if necessary
+        if rebind_param_repo then
+            host_param_repo = rebind_param_repo
+            param_repo = host_param_repo:copy(train_loc_type)
+            layer_repo:rebind(param_repo)
+            rebind_param_repo = nil
+        end
         gconf.randomize = bp
         -- build buffer
         local buffer = make_buffer(make_readers(scp_file, layer_repo))
@@ -66,20 +80,38 @@ local function build_trainer(ifname)
         print_stat(layer_repo)
         mat_type.print_profile()
         mat_type.clear_profile()
-        if (not bp) and prefix ~= nil then
-            nerv.info("writing back...")
-            local fname = string.format("%s_cv%.3f.nerv",
-                            prefix, get_accuracy(layer_repo))
-            network:get_params():export(fname, nil)
+        local fname
+        if (not bp) then
+            host_param_repo = param_repo:copy(src_loc_type)
+            if prefix ~= nil then
+                nerv.info("writing back...")
+                fname = string.format("%s_cv%.3f.nerv",
+                                    prefix, get_accuracy(layer_repo))
+                host_param_repo:export(fname, nil)
+            end
         end
-        return get_accuracy(layer_repo)
+        return get_accuracy(layer_repo), host_param_repo, fname
     end
     return iterative_trainer
 end
 
-local function check_and_add_defaults(spec)
-    for k, v in pairs(spec) do
-        gconf[k] = opts[string.gsub(k, '_', '-')].val or gconf[k] or v
+local function check_and_add_defaults(spec, opts)
+    local function get_opt_val(k)
+        return opts[string.gsub(k, '_', '-')].val
+    end
+    local opt_v = get_opt_val("resume_from")
+    if opt_v then
+        gconf = dofile(opt_v)
+    else
+        for k, v in pairs(spec) do
+            local opt_v = get_opt_val(k)
+            if opt_v ~= nil then
+                gconf[k] = opt_v
+            elseif gconf[k] ~= nil then
+            elseif v ~= nil then
+                gconf[k] = v
+            end
+        end
     end
 end
 
@@ -112,6 +144,13 @@ local function print_gconf()
     end
 end
 
+local function dump_gconf(fname)
+    local f = io.open(fname, "w")
+    f:write("return ")
+    f:write(table.tostring(gconf))
+    f:close()
+end
+
 local trainer_defaults = {
     lrate = 0.8,
     batch_size = 256,
@@ -121,22 +160,26 @@ local trainer_defaults = {
     start_halving_inc = 0.5,
     halving_factor = 0.6,
     end_halving_inc = 0.1,
+    cur_iter = 1,
     min_iter = 1,
     max_iter = 20,
     min_halving = 5,
     do_halving = false,
-    tr_scp = nil,
-    cv_scp = nil,
-    cumat_type = nerv.CuMatrixFloat,
-    mmat_type = nerv.MMatrixFloat,
-    debug = false
+    cumat_tname = "nerv.CuMatrixFloat",
+    mmat_tname = "nerv.MMatrixFloat",
+    debug = false,
 }
 
 local options = make_options(trainer_defaults)
-table.insert(options, {"help", "h", "boolean",
-                        default = false, desc = "show this help information"})
-table.insert(options, {"dir", nil, "string",
-                        default = nil, desc = "specify the working directory"})
+local extra_opt_spec = {
+    {"tr-scp", nil, "string"},
+    {"cv-scp", nil, "string"},
+    {"resume-from", nil, "string"},
+    {"help", "h", "boolean", default = false, desc = "show this help information"},
+    {"dir", nil, "string", desc = "specify the working directory"},
+}
+
+table.extend(options, extra_opt_spec)
 
 arg, opts = nerv.parse_args(arg, options)
 
@@ -155,14 +198,16 @@ Note: config key like aaa_bbbb_cc could be overriden by specifying
 
 ]]--
 
-check_and_add_defaults(trainer_defaults)
+check_and_add_defaults(trainer_defaults, opts)
+gconf.mmat_type = nerv.get_type(gconf.mmat_tname)
+gconf.cumat_type = nerv.get_type(gconf.cumat_tname)
+gconf.use_cpu = econf.use_cpu or false
 
 local pf0 = gconf.initialized_param
-local trainer = build_trainer(pf0)
-local accu_best = trainer(nil, gconf.cv_scp, false)
 local date_pattern = "%Y%m%d%H%M%S"
 local logfile_name = "log"
 local working_dir = opts["dir"].val or string.format("nerv_%s", os.date(date_pattern))
+local rebind_param_repo = nil
 
 print_gconf()
 if not lfs.mkdir(working_dir) then
@@ -173,37 +218,55 @@ dir.copyfile(arg[1], working_dir)
 -- set logfile path
 nerv.set_logfile(path.join(working_dir, logfile_name))
 path.chdir(working_dir)
-nerv.info("initial cross validation: %.3f", accu_best)
-for i = 1, gconf.max_iter do
-    nerv.info("[NN] begin iteration %d with lrate = %.6f", i, gconf.lrate)
-    local accu_tr = trainer(nil, gconf.tr_scp, true)
-    nerv.info("[TR] training set %d: %.3f", i, accu_tr)
-    local accu_new = trainer(
-                        string.format("%s_%s_iter_%d_lr%f_tr%.3f",
-                            string.gsub(
-                                (string.gsub(pf0[1], "(.*/)(.*)", "%2")),
-                                "(.*)%..*", "%1"),
-                            os.date(date_pattern),
-                            i, gconf.lrate,
-                            accu_tr),
-                        gconf.cv_scp, false)
-    nerv.info("[CV] cross validation %d: %.3f", i, accu_new)
-    -- TODO: revert the weights
-    local accu_diff = accu_new - accu_best
-    if gconf.do_halving and
-        accu_diff < gconf.end_halving_inc and
-        i > gconf.min_iter then
-        break
-    end
-    if accu_diff < gconf.start_halving_inc and
-        i >= gconf.min_halving then
-        gconf.do_halving = true
-    end
-    if gconf.do_halving then
-        gconf.lrate = gconf.lrate * gconf.halving_factor
-    end
-    if accu_new > accu_best then
-        accu_best = accu_new
-    end
+
+-- start the training
+local trainer = build_trainer(pf0)
+local pr_prev
+gconf.accu_best, pr_prev = trainer(nil, gconf.cv_scp, false)
+nerv.info("initial cross validation: %.3f", gconf.accu_best)
+for i = gconf.cur_iter, gconf.max_iter do
+    local stop = false
+    gconf.cur_iter = i
+    dump_gconf(string.format("iter_%d.meta", i))
+    repeat -- trick to implement `continue` statement
+        nerv.info("[NN] begin iteration %d with lrate = %.6f", i, gconf.lrate)
+        local accu_tr = trainer(nil, gconf.tr_scp, true, rebind_param_repo)
+        nerv.info("[TR] training set %d: %.3f", i, accu_tr)
+        local param_prefix = string.format("%s_%s_iter_%d_lr%f_tr%.3f",
+                                string.gsub(
+                                    (string.gsub(pf0[1], "(.*/)(.*)", "%2")),
+                                    "(.*)%..*", "%1"),
+                                os.date(date_pattern),
+                                i, gconf.lrate,
+                                accu_tr)
+        local accu_new, pr_new, param_fname = trainer(param_prefix, gconf.cv_scp, false)
+        nerv.info("[CV] cross validation %d: %.3f", i, accu_new)
+        local accu_prev = gconf.accu_best
+        if accu_new < gconf.accu_best then
+            nerv.info("rejecting the trained params, rollback to the previous one")
+            file.move(param_fname, param_fname .. ".rejected")
+            rebind_param_repo = pr_prev
+            break -- `continue` equivalent
+        else
+            nerv.info("accepting the trained params")
+            gconf.accu_best = accu_new
+            pr_prev = pr_new
+            gconf.initialized_param = {path.join(path.currentdir(), param_fname)}
+        end
+        if gconf.do_halving and
+            gconf.accu_best - accu_prev < gconf.end_halving_inc and
+            i > gconf.min_iter then
+            stop = true
+            break
+        end
+        if gconf.accu_best - accu_prev < gconf.start_halving_inc and
+            i >= gconf.min_halving then
+            gconf.do_halving = true
+        end
+        if gconf.do_halving then
+            gconf.lrate = gconf.lrate * gconf.halving_factor
+        end
+    until true
+    if stop then break end
 --    nerv.Matrix.print_profile()
 end
diff --git a/nerv/examples/swb_baseline.lua b/nerv/examples/swb_baseline.lua
index 4cb2389..0ce8468 100644
--- a/nerv/examples/swb_baseline.lua
+++ b/nerv/examples/swb_baseline.lua
@@ -7,8 +7,7 @@ gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9,
         cv_scp = "/slfs1/users/mfy43/swb_ivec/train_cv.scp",
         htk_conf = "/slfs1/users/mfy43/swb_ivec/plp_0_d_a.conf",
         initialized_param = {"/slfs1/users/mfy43/swb_init.nerv",
-                            "/slfs1/users/mfy43/swb_global_transf.nerv"},
-        debug = false}
+                            "/slfs1/users/mfy43/swb_global_transf.nerv"}}
 
 function make_layer_repo(param_repo)
     local layer_repo = nerv.LayerRepo(
@@ -16,51 +15,51 @@ function make_layer_repo(param_repo)
         -- global transf
         ["nerv.BiasLayer"] =
         {
-            blayer1 = {{bias = "bias1"}, {dim_in = {429}, dim_out = {429}}},
-            blayer2 = {{bias = "bias2"}, {dim_in = {429}, dim_out = {429}}}
+            blayer1 = {dim_in = {429}, dim_out = {429}, params = {bias = "bias1"}},
+            blayer2 = {dim_in = {429}, dim_out = {429}, params = {bias = "bias2"}}
         },
         ["nerv.WindowLayer"] =
         {
-            wlayer1 = {{window = "window1"}, {dim_in = {429}, dim_out = {429}}},
-            wlayer2 = {{window = "window2"}, {dim_in = {429}, dim_out = {429}}}
+            wlayer1 = {dim_in = {429}, dim_out = {429}, params = {window = "window1"}},
+            wlayer2 = {dim_in = {429}, dim_out = {429}, params = {window = "window2"}}
         },
         -- biased linearity
         ["nerv.AffineLayer"] =
         {
-            affine0 = {{ltp = "affine0_ltp", bp = "affine0_bp"},
-            {dim_in = {429}, dim_out = {2048}}},
-            affine1 = {{ltp = "affine1_ltp", bp = "affine1_bp"},
-            {dim_in = {2048}, dim_out = {2048}}},
-            affine2 = {{ltp = "affine2_ltp", bp = "affine2_bp"},
-            {dim_in = {2048}, dim_out = {2048}}},
-            affine3 = {{ltp = "affine3_ltp", bp = "affine3_bp"},
-            {dim_in = {2048}, dim_out = {2048}}},
-            affine4 = {{ltp = "affine4_ltp", bp = "affine4_bp"},
-            {dim_in = {2048}, dim_out = {2048}}},
-            affine5 = {{ltp = "affine5_ltp", bp = "affine5_bp"},
-            {dim_in = {2048}, dim_out = {2048}}},
-            affine6 = {{ltp = "affine6_ltp", bp = "affine6_bp"},
-            {dim_in = {2048}, dim_out = {2048}}},
-            affine7 = {{ltp = "affine7_ltp", bp = "affine7_bp"},
-            {dim_in = {2048}, dim_out = {3001}}}
+            affine0 = {dim_in = {429}, dim_out = {2048},
+                        params = {ltp = "affine0_ltp", bp = "affine0_bp"}},
+            affine1 = {dim_in = {2048}, dim_out = {2048},
+                        params = {ltp = "affine1_ltp", bp = "affine1_bp"}},
+            affine2 = {dim_in = {2048}, dim_out = {2048},
+                        params = {ltp = "affine2_ltp", bp = "affine2_bp"}},
+            affine3 = {dim_in = {2048}, dim_out = {2048},
+                        params = {ltp = "affine3_ltp", bp = "affine3_bp"}},
+            affine4 = {dim_in = {2048}, dim_out = {2048},
+                        params = {ltp = "affine4_ltp", bp = "affine4_bp"}},
+            affine5 = {dim_in = {2048}, dim_out = {2048},
+                        params = {ltp = "affine5_ltp", bp = "affine5_bp"}},
+            affine6 = {dim_in = {2048}, dim_out = {2048},
+                        params = {ltp = "affine6_ltp", bp = "affine6_bp"}},
+            affine7 = {dim_in = {2048}, dim_out = {3001},
+                        params = {ltp = "affine7_ltp", bp = "affine7_bp"}}
         },
         ["nerv.SigmoidLayer"] =
         {
-            sigmoid0 = {{}, {dim_in = {2048}, dim_out = {2048}}},
-            sigmoid1 = {{}, {dim_in = {2048}, dim_out = {2048}}},
-            sigmoid2 = {{}, {dim_in = {2048}, dim_out = {2048}}},
-            sigmoid3 = {{}, {dim_in = {2048}, dim_out = {2048}}},
-            sigmoid4 = {{}, {dim_in = {2048}, dim_out = {2048}}},
-            sigmoid5 = {{}, {dim_in = {2048}, dim_out = {2048}}},
-            sigmoid6 = {{}, {dim_in = {2048}, dim_out = {2048}}}
+            sigmoid0 = {dim_in = {2048}, dim_out = {2048}},
+            sigmoid1 = {dim_in = {2048}, dim_out = {2048}},
+            sigmoid2 = {dim_in = {2048}, dim_out = {2048}},
+            sigmoid3 = {dim_in = {2048}, dim_out = {2048}},
+            sigmoid4 = {dim_in = {2048}, dim_out = {2048}},
+            sigmoid5 = {dim_in = {2048}, dim_out = {2048}},
+            sigmoid6 = {dim_in = {2048}, dim_out = {2048}}
         },
         ["nerv.SoftmaxCELayer"] = -- softmax + ce criterion layer for finetune output
         {
-            ce_crit = {{}, {dim_in = {3001, 1}, dim_out = {1}, compressed = true}}
+            ce_crit = {dim_in = {3001, 1}, dim_out = {1}, compressed = true}
         },
         ["nerv.SoftmaxLayer"] = -- softmax for decode output
         {
-            softmax = {{}, {dim_in = {3001}, dim_out = {3001}}}
+            softmax = {dim_in = {3001}, dim_out = {3001}}
         }
     }, param_repo, gconf)
 
@@ -68,7 +67,7 @@ function make_layer_repo(param_repo)
     {
         ["nerv.DAGLayer"] =
         {
-            global_transf = {{}, {
+            global_transf = {
                 dim_in = {429}, dim_out = {429},
                 sub_layers = layer_repo,
                 connections = {
@@ -78,8 +77,8 @@ function make_layer_repo(param_repo)
                     ["blayer2[1]"] = "wlayer2[1]",
                     ["wlayer2[1]"] = "<output>[1]"
                 }
-            }},
-            main = {{}, {
+            },
+            main = {
                 dim_in = {429}, dim_out = {3001},
                 sub_layers = layer_repo,
                 connections = {
@@ -100,7 +99,7 @@ function make_layer_repo(param_repo)
                     ["sigmoid6[1]"] = "affine7[1]",
                     ["affine7[1]"] = "<output>[1]"
                 }
-            }}
+            }
         }
     }, param_repo, gconf)
 
@@ -108,7 +107,7 @@ function make_layer_repo(param_repo)
     {
         ["nerv.DAGLayer"] =
         {
-            ce_output = {{}, {
+            ce_output = {
                 dim_in = {429, 1}, dim_out = {1},
                 sub_layers = layer_repo,
                 connections = {
@@ -117,8 +116,8 @@ function make_layer_repo(param_repo)
                     ["<input>[2]"] = "ce_crit[2]",
                     ["ce_crit[1]"] = "<output>[1]"
                 }
-            }},
-            softmax_output = {{}, {
+            },
+            softmax_output = {
                 dim_in = {429}, dim_out = {3001},
                 sub_layers = layer_repo,
                 connections = {
@@ -126,7 +125,7 @@ function make_layer_repo(param_repo)
                     ["main[1]"] = "softmax[1]",
                     ["softmax[1]"] = "<output>[1]"
                 }
-            }}
+            }
         }
     }, param_repo, gconf)
 
diff --git a/nerv/examples/swb_baseline2.lua b/nerv/examples/swb_baseline2.lua
index b0b9689..8b5ebb1 100644
--- a/nerv/examples/swb_baseline2.lua
+++ b/nerv/examples/swb_baseline2.lua
@@ -7,8 +7,7 @@ gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9,
         cv_scp = "/speechlab/users/mfy43/swb50/train_cv.scp",
         htk_conf = "/speechlab/users/mfy43/swb50/plp_0_d_a.conf",
         initialized_param = {"/speechlab/users/mfy43/swb50/swb_init.nerv",
-                            "/speechlab/users/mfy43/swb50/swb_global_transf.nerv"},
-        debug = false}
+                            "/speechlab/users/mfy43/swb50/swb_global_transf.nerv"}}
 
 function make_layer_repo(param_repo)
     local layer_repo = nerv.LayerRepo(
@@ -16,51 +15,51 @@ function make_layer_repo(param_repo)
         -- global transf
         ["nerv.BiasLayer"] =
         {
-            blayer1 = {{bias = "bias1"}, {dim_in = {429}, dim_out = {429}}},
-            blayer2 = {{bias = "bias2"}, {dim_in = {429}, dim_out = {429}}}
+            blayer1 = {dim_in = {429}, dim_out = {429}, params = {bias = "bias1"}},
+            blayer2 = {dim_in = {429}, dim_out = {429}, params = {bias = "bias2"}}
         },
         ["nerv.WindowLayer"] =
         {
-            wlayer1 = {{window = "window1"}, {dim_in = {429}, dim_out = {429}}},
-            wlayer2 = {{window = "window2"}, {dim_in = {429}, dim_out = {429}}}
+            wlayer1 = {dim_in = {429}, dim_out = {429}, params = {window = "window1"}},
+            wlayer2 = {dim_in = {429}, dim_out = {429}, params = {window = "window2"}}
         },
         -- biased linearity
         ["nerv.AffineLayer"] =
         {
-            affine0 = {{ltp = "affine0_ltp", bp = "affine0_bp"},
-            {dim_in = {429}, dim_out = {2048}}},
-            affine1 = {{ltp = "affine1_ltp", bp = "affine1_bp"},
-            {dim_in = {2048}, dim_out = {2048}}},
-            affine2 = {{ltp = "affine2_ltp", bp = "affine2_bp"},
-            {dim_in = {2048}, dim_out = {2048}}},
-            affine3 = {{ltp = "affine3_ltp", bp = "affine3_bp"},
-            {dim_in = {2048}, dim_out = {2048}}},
-            affine4 = {{ltp = "affine4_ltp", bp = "affine4_bp"},
-            {dim_in = {2048}, dim_out = {2048}}},
-            affine5 = {{ltp = "affine5_ltp", bp = "affine5_bp"},
-            {dim_in = {2048}, dim_out = {2048}}},
-            affine6 = {{ltp = "affine6_ltp", bp = "affine6_bp"},
-            {dim_in = {2048}, dim_out = {2048}}},
-            affine7 = {{ltp = "affine7_ltp", bp = "affine7_bp"},
-            {dim_in = {2048}, dim_out = {3001}}}
+            affine0 = {dim_in = {429}, dim_out = {2048},
+                        params = {ltp = "affine0_ltp", bp = "affine0_bp"}},
+            affine1 = {dim_in = {2048}, dim_out = {2048},
+                        params = {ltp = "affine1_ltp", bp = "affine1_bp"}},
+            affine2 = {dim_in = {2048}, dim_out = {2048},
+                        params = {ltp = "affine2_ltp", bp = "affine2_bp"}},
+            affine3 = {dim_in = {2048}, dim_out = {2048},
+                        params = {ltp = "affine3_ltp", bp = "affine3_bp"}},
+            affine4 = {dim_in = {2048}, dim_out = {2048},
+                        params = {ltp = "affine4_ltp", bp = "affine4_bp"}},
+            affine5 = {dim_in = {2048}, dim_out = {2048},
+                        params = {ltp = "affine5_ltp", bp = "affine5_bp"}},
+            affine6 = {dim_in = {2048}, dim_out = {2048},
+                        params = {ltp = "affine6_ltp", bp = "affine6_bp"}},
+            affine7 = {dim_in = {2048}, dim_out = {3001},
+                        params = {ltp = "affine7_ltp", bp = "affine7_bp"}}
         },
         ["nerv.SigmoidLayer"] =
         {
-            sigmoid0 = {{}, {dim_in = {2048}, dim_out = {2048}}},
-            sigmoid1 = {{}, {dim_in = {2048}, dim_out = {2048}}},
-            sigmoid2 = {{}, {dim_in = {2048}, dim_out = {2048}}},
-            sigmoid3 = {{}, {dim_in = {2048}, dim_out = {2048}}},
-            sigmoid4 = {{}, {dim_in = {2048}, dim_out = {2048}}},
-            sigmoid5 = {{}, {dim_in = {2048}, dim_out = {2048}}},
-            sigmoid6 = {{}, {dim_in = {2048}, dim_out = {2048}}}
+            sigmoid0 = {dim_in = {2048}, dim_out = {2048}},
+            sigmoid1 = {dim_in = {2048}, dim_out = {2048}},
+            sigmoid2 = {dim_in = {2048}, dim_out = {2048}},
+            sigmoid3 = {dim_in = {2048}, dim_out = {2048}},
+            sigmoid4 = {dim_in = {2048}, dim_out = {2048}},
+            sigmoid5 = {dim_in = {2048}, dim_out = {2048}},
+            sigmoid6 = {dim_in = {2048}, dim_out = {2048}}
         },
         ["nerv.SoftmaxCELayer"] = -- softmax + ce criterion layer for finetune output
         {
-            ce_crit = {{}, {dim_in = {3001, 1}, dim_out = {1}, compressed = true}}
+            ce_crit = {dim_in = {3001, 1}, dim_out = {1}, compressed = true}
         },
         ["nerv.SoftmaxLayer"] = -- softmax for decode output
         {
-            softmax = {{}, {dim_in = {3001}, dim_out = {3001}}}
+            softmax = {dim_in = {3001}, dim_out = {3001}}
         }
     }, param_repo, gconf)
 
@@ -68,7 +67,7 @@ function make_layer_repo(param_repo)
     {
         ["nerv.DAGLayer"] =
         {
-            global_transf = {{}, {
+            global_transf = {
                 dim_in = {429}, dim_out = {429},
                 sub_layers = layer_repo,
                 connections = {
@@ -78,8 +77,8 @@ function make_layer_repo(param_repo)
                     ["blayer2[1]"] = "wlayer2[1]",
                     ["wlayer2[1]"] = "<output>[1]"
                 }
-            }},
-            main = {{}, {
+            },
+            main = {
                 dim_in = {429}, dim_out = {3001},
                 sub_layers = layer_repo,
                 connections = {
@@ -100,7 +99,7 @@ function make_layer_repo(param_repo)
                     ["sigmoid6[1]"] = "affine7[1]",
                     ["affine7[1]"] = "<output>[1]"
                 }
-            }}
+            }
         }
     }, param_repo, gconf)
 
@@ -108,7 +107,7 @@ function make_layer_repo(param_repo)
     {
         ["nerv.DAGLayer"] =
         {
-            ce_output = {{}, {
+            ce_output = {
                 dim_in = {429, 1}, dim_out = {1},
                 sub_layers = layer_repo,
                 connections = {
@@ -117,8 +116,8 @@ function make_layer_repo(param_repo)
                     ["<input>[2]"] = "ce_crit[2]",
                     ["ce_crit[1]"] = "<output>[1]"
                 }
-            }},
-            softmax_output = {{}, {
+            },
+            softmax_output = {
                 dim_in = {429}, dim_out = {3001},
                 sub_layers = layer_repo,
                 connections = {
@@ -126,7 +125,7 @@ function make_layer_repo(param_repo)
                     ["main[1]"] = "softmax[1]",
                     ["softmax[1]"] = "<output>[1]"
                 }
-            }}
+            }
         }
     }, param_repo, gconf)
 
diff --git a/nerv/examples/swb_baseline_basic.lua b/nerv/examples/swb_baseline_basic.lua
deleted file mode 100644
index 71f04a3..0000000
--- a/nerv/examples/swb_baseline_basic.lua
+++ /dev/null
@@ -1,162 +0,0 @@
-require 'htk_io'
-gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9,
-        cumat_type = nerv.CuMatrixFloat,
-        mmat_type = nerv.MMatrixFloat,
-        frm_ext = 5,
-        frm_trim = 5,
-        tr_scp = "/slfs1/users/mfy43/swb_ivec/train_bp.scp",
-        cv_scp = "/slfs1/users/mfy43/swb_ivec/train_cv.scp",
-        htk_conf = "/slfs1/users/mfy43/swb_ivec/plp_0_d_a.conf",
-        initialized_param = {"/slfs1/users/mfy43/swb_init.nerv",
-                "/slfs1/users/mfy43/swb_global_transf.nerv"},
-        debug = false}
-
-function make_layer_repo(param_repo)
-    local layer_repo = nerv.LayerRepo(
-    {
-        -- global transf
-        ["nerv.BiasLayer"] =
-        {
-            blayer1 = {{bias = "bias1"}, {dim_in = {429}, dim_out = {429}}},
-            blayer2 = {{bias = "bias2"}, {dim_in = {429}, dim_out = {429}}}
-        },
-        ["nerv.WindowLayer"] =
-        {
-            wlayer1 = {{window = "window1"}, {dim_in = {429}, dim_out = {429}}},
-            wlayer2 = {{window = "window2"}, {dim_in = {429}, dim_out = {429}}}
-        },
-        -- biased linearity
-        ["nerv.AffineLayer"] =
-        {
-            affine0 = {{ltp = "affine0_ltp", bp = "affine0_bp"},
-            {dim_in = {429}, dim_out = {2048}}},
-            affine1 = {{ltp = "affine1_ltp", bp = "affine1_bp"},
-            {dim_in = {2048}, dim_out = {2048}}},
-            affine2 = {{ltp = "affine2_ltp", bp = "affine2_bp"},
-            {dim_in = {2048}, dim_out = {2048}}},
-            affine3 = {{ltp = "affine3_ltp", bp = "affine3_bp"},
-            {dim_in = {2048}, dim_out = {2048}}},
-            affine4 = {{ltp = "affine4_ltp", bp = "affine4_bp"},
-            {dim_in = {2048}, dim_out = {2048}}},
-            affine5 = {{ltp = "affine5_ltp", bp = "affine5_bp"},
-            {dim_in = {2048}, dim_out = {2048}}},
-            affine6 = {{ltp = "affine6_ltp", bp = "affine6_bp"},
-            {dim_in = {2048}, dim_out = {2048}}},
-            affine7 = {{ltp = "affine7_ltp", bp = "affine7_bp"},
-            {dim_in = {2048}, dim_out = {3001}}}
-        },
-        ["nerv.SigmoidLayer"] =
-        {
-            sigmoid0 = {{}, {dim_in = {2048}, dim_out = {2048}}},
-            sigmoid1 = {{}, {dim_in = {2048}, dim_out = {2048}}},
-            sigmoid2 = {{}, {dim_in = {2048}, dim_out = {2048}}},
-            sigmoid3 = {{}, {dim_in = {2048}, dim_out = {2048}}},
-            sigmoid4 = {{}, {dim_in = {2048}, dim_out = {2048}}},
-            sigmoid5 = {{}, {dim_in = {2048}, dim_out = {2048}}},
-            sigmoid6 = {{}, {dim_in = {2048}, dim_out = {2048}}}
-        },
-        ["nerv.SoftmaxCELayer"] =
-        {
-            ce_crit = {{}, {dim_in = {3001, 1}, dim_out = {1}, compressed = true}}
-        }
-    }, param_repo, gconf)
-
-    layer_repo:add_layers(
-    {
-        ["nerv.DAGLayer"] =
-        {
-            global_transf = {{}, {
-                dim_in = {429}, dim_out = {429},
-                sub_layers = layer_repo,
-                connections = {
-                    ["<input>[1]"] = "blayer1[1]",
-                    ["blayer1[1]"] = "wlayer1[1]",
-                    ["wlayer1[1]"] = "blayer2[1]",
-                    ["blayer2[1]"] = "wlayer2[1]",
-                    ["wlayer2[1]"] = "<output>[1]"
-                }
-            }},
-            main = {{}, {
-                dim_in = {429, 1}, dim_out = {1},
-                sub_layers = layer_repo,
-                connections = {
-                    ["<input>[1]"] = "affine0[1]",
-                    ["affine0[1]"] = "sigmoid0[1]",
-                    ["sigmoid0[1]"] = "affine1[1]",
-                    ["affine1[1]"] = "sigmoid1[1]",
-                    ["sigmoid1[1]"] = "affine2[1]",
-                    ["affine2[1]"] = "sigmoid2[1]",
-                    ["sigmoid2[1]"] = "affine3[1]",
-                    ["affine3[1]"] = "sigmoid3[1]",
-                    ["sigmoid3[1]"] = "affine4[1]",
-                    ["affine4[1]"] = "sigmoid4[1]",
-                    ["sigmoid4[1]"] = "affine5[1]",
-                    ["affine5[1]"] = "sigmoid5[1]",
-                    ["sigmoid5[1]"] = "affine6[1]",
-                    ["affine6[1]"] = "sigmoid6[1]",
-                    ["sigmoid6[1]"] = "affine7[1]",
-                    ["affine7[1]"] = "ce_crit[1]",
-                    ["<input>[2]"] = "ce_crit[2]",
-                    ["ce_crit[1]"] = "<output>[1]"
-                }
-            }}
-        }
-    }, param_repo, gconf)
-    return layer_repo
-end
-
-function get_network(layer_repo)
-    return layer_repo:get_layer("main")
-end
-
-function make_readers(scp_file, layer_repo)
-    return {
-                {reader = nerv.TNetReader(gconf,
-                    {
-                        id = "main_scp",
-                        scp_file = scp_file,
-                        conf_file = gconf.htk_conf,
-                        frm_ext = gconf.frm_ext,
-                        mlfs = {
-                            phone_state = {
-                                file = "/slfs1/users/mfy43/swb_ivec/ref.mlf",
-                                format = "map",
-                                format_arg = "/slfs1/users/mfy43/swb_ivec/dict",
-                                dir = "*/",
-                                ext = "lab"
-                            }
-                        }
-                    }),
-                data = {main_scp = 429, phone_state = 1}}
-            }
-end
-
-function make_buffer(readers)
-    return nerv.SGDBuffer(gconf,
-        {
-            buffer_size = gconf.buffer_size,
-            randomize = gconf.randomize,
-            readers = readers
-        })
-end
-
-function get_input_order()
-    return {{id = "main_scp", global_transf = true},
-            {id = "phone_state"}}
-end
-
-function get_accuracy(layer_repo)
-    local ce_crit = layer_repo:get_layer("ce_crit")
-    return ce_crit.total_correct / ce_crit.total_frames * 100
-end
-
-function print_stat(layer_repo)
-    local ce_crit = layer_repo:get_layer("ce_crit")
-    nerv.info("*** training stat begin ***")
-    nerv.printf("cross entropy:\t\t%.8f\n", ce_crit.total_ce)
-    nerv.printf("correct:\t\t%d\n", ce_crit.total_correct)
-    nerv.printf("frames:\t\t\t%d\n", ce_crit.total_frames)
-    nerv.printf("err/frm:\t\t%.8f\n", ce_crit.total_ce / ce_crit.total_frames)
-    nerv.printf("accuracy:\t\t%.3f%%\n", get_accuracy(layer_repo))
-    nerv.info("*** training stat end ***")
-end
diff --git a/nerv/examples/timit_baseline2.lua b/nerv/examples/timit_baseline2.lua
index 103d89d..2d144b5 100644
--- a/nerv/examples/timit_baseline2.lua
+++ b/nerv/examples/timit_baseline2.lua
@@ -16,46 +16,46 @@ function make_layer_repo(param_repo)
         -- global transf
         ["nerv.BiasLayer"] =
         {
-            blayer1 = {{bias = "bias0"}, {dim_in = {440}, dim_out = {440}}}
+            blayer1 = {dim_in = {440}, dim_out = {440}, params = {bias = "bias0"}}
         },
         ["nerv.WindowLayer"] =
         {
-            wlayer1 = {{window = "window0"}, {dim_in = {440}, dim_out = {440}}}
+            wlayer1 = {dim_in = {440}, dim_out = {440}, params = {window = "window0"}}
         },
         -- biased linearity
         ["nerv.AffineLayer"] =
         {
-            affine0 = {{ltp = "affine0_ltp", bp = "affine0_bp"},
-            {dim_in = {440}, dim_out = {1024}}},
-            affine1 = {{ltp = "affine1_ltp", bp = "affine1_bp"},
-            {dim_in = {1024}, dim_out = {1024}}},
-            affine2 = {{ltp = "affine2_ltp", bp = "affine2_bp"},
-            {dim_in = {1024}, dim_out = {1024}}},
-            affine3 = {{ltp = "affine3_ltp", bp = "affine3_bp"},
-            {dim_in = {1024}, dim_out = {1024}}},
-            affine4 = {{ltp = "affine4_ltp", bp = "affine4_bp"},
-            {dim_in = {1024}, dim_out = {1024}}},
-            affine5 = {{ltp = "affine5_ltp", bp = "affine5_bp"},
-            {dim_in = {1024}, dim_out = {1024}}},
-            affine6 = {{ltp = "affine6_ltp", bp = "affine6_bp"},
-            {dim_in = {1024}, dim_out = {1959}}}
+            affine0 = {dim_in = {440}, dim_out = {1024},
+                        params = {ltp = "affine0_ltp", bp = "affine0_bp"}},
+            affine1 = {dim_in = {1024}, dim_out = {1024},
+                        params = {ltp = "affine1_ltp", bp = "affine1_bp"}},
+            affine2 = {dim_in = {1024}, dim_out = {1024},
+                        params = {ltp = "affine2_ltp", bp = "affine2_bp"}},
+            affine3 = {dim_in = {1024}, dim_out = {1024},
+                        params = {ltp = "affine3_ltp", bp = "affine3_bp"}},
+            affine4 = {dim_in = {1024}, dim_out = {1024},
+                        params = {ltp = "affine4_ltp", bp = "affine4_bp"}},
+            affine5 = {dim_in = {1024}, dim_out = {1024},
+                        params = {ltp = "affine5_ltp", bp = "affine5_bp"}},
+            affine6 = {dim_in = {1024}, dim_out = {1959},
+                        params = {ltp = "affine6_ltp", bp = "affine6_bp"}}
         },
         ["nerv.SigmoidLayer"] =
         {
-            sigmoid0 = {{}, {dim_in = {1024}, dim_out = {1024}}},
-            sigmoid1 = {{}, {dim_in = {1024}, dim_out = {1024}}},
-            sigmoid2 = {{}, {dim_in = {1024}, dim_out = {1024}}},
-            sigmoid3 = {{}, {dim_in = {1024}, dim_out = {1024}}},
-            sigmoid4 = {{}, {dim_in = {1024}, dim_out = {1024}}},
-            sigmoid5 = {{}, {dim_in = {1024}, dim_out = {1024}}}
+            sigmoid0 = {dim_in = {1024}, dim_out = {1024}},
+            sigmoid1 = {dim_in = {1024}, dim_out = {1024}},
+            sigmoid2 = {dim_in = {1024}, dim_out = {1024}},
+            sigmoid3 = {dim_in = {1024}, dim_out = {1024}},
+            sigmoid4 = {dim_in = {1024}, dim_out = {1024}},
+            sigmoid5 = {dim_in = {1024}, dim_out = {1024}}
         },
         ["nerv.SoftmaxCELayer"] = -- softmax + ce criterion layer for finetune output
         {
-            ce_crit = {{}, {dim_in = {1959, 1}, dim_out = {1}, compressed = true}}
+            ce_crit = {dim_in = {1959, 1}, dim_out = {1}, compressed = true}
         },
         ["nerv.SoftmaxLayer"] = -- softmax for decode output
         {
-            softmax = {{}, {dim_in = {1959}, dim_out = {1959}}}
+            softmax = {dim_in = {1959}, dim_out = {1959}}
         }
     }, param_repo, gconf)
 
@@ -63,7 +63,7 @@ function make_layer_repo(param_repo)
     {
         ["nerv.DAGLayer"] =
         {
-            global_transf = {{}, {
+            global_transf = {
                 dim_in = {440}, dim_out = {440},
                 sub_layers = layer_repo,
                 connections = {
@@ -71,8 +71,8 @@ function make_layer_repo(param_repo)
                     ["blayer1[1]"] = "wlayer1[1]",
                     ["wlayer1[1]"] = "<output>[1]"
                 }
-            }},
-            main = {{}, {
+            },
+            main = {
                 dim_in = {440}, dim_out = {1959},
                 sub_layers = layer_repo,
                 connections = {
@@ -91,7 +91,7 @@ function make_layer_repo(param_repo)
                     ["sigmoid5[1]"] = "affine6[1]",
                     ["affine6[1]"] = "<output>[1]"
                 }
-            }}
+            }
         }
     }, param_repo, gconf)
 
@@ -99,7 +99,7 @@ function make_layer_repo(param_repo)
     {
         ["nerv.DAGLayer"] =
         {
-            ce_output = {{}, {
+            ce_output = {
                 dim_in = {440, 1}, dim_out = {1},
                 sub_layers = layer_repo,
                 connections = {
@@ -108,8 +108,8 @@ function make_layer_repo(param_repo)
                     ["<input>[2]"] = "ce_crit[2]",
                     ["ce_crit[1]"] = "<output>[1]"
                 }
-            }},
-            softmax_output = {{}, {
+            },
+            softmax_output = {
                 dim_in = {440}, dim_out = {1959},
                 sub_layers = layer_repo,
                 connections = {
@@ -117,7 +117,7 @@ function make_layer_repo(param_repo)
                     ["main[1]"] = "softmax[1]",
                     ["softmax[1]"] = "<output>[1]"
                 }
-            }}
+            }
         }
     }, param_repo, gconf)
 
diff --git a/nerv/init.lua b/nerv/init.lua
index 4d7b687..da7df29 100644
--- a/nerv/init.lua
+++ b/nerv/init.lua
@@ -98,24 +98,27 @@ function nerv.class(tname, parenttname)
 end
 
 function table.val_to_str(v)
-  if "string" == type(v) then
-    v = string.gsub(v, "\n", "\\n")
-    if string.match(string.gsub(v,"[^'\"]",""), '^"+$') then
-      return "'" .. v .. "'"
+    if "string" == type(v) then
+        v = string.gsub(v, "\n", "\\n")
+        if string.match(string.gsub(v,"[^'\"]",""), '^"+$') then
+            return "'" .. v .. "'"
+        end
+        return '"' .. string.gsub(v,'"', '\\"') .. '"'
+    else
+        return "table" == type(v) and table.tostring(v) or
+                    (("number" == type(v) or
+                    "string" == type(v) or
+                    "boolean" == type(v)) and tostring(v)) or
+                    nil -- failed to serialize
     end
-    return '"' .. string.gsub(v,'"', '\\"') .. '"'
-  else
-    return "table" == type(v) and table.tostring(v) or
-      tostring(v)
-  end
 end
 
 function table.key_to_str (k)
-  if "string" == type(k) and string.match(k, "^[_%a][_%a%d]*$") then
-    return k
-  else
-    return "[" .. table.val_to_str(k) .. "]"
-  end
+    if "string" == type(k) and string.match(k, "^[_%a][_%a%d]*$") then
+        return k
+    else
+        return "[" .. table.val_to_str(k) .. "]"
+    end
 end
 
 --- Get the string representation of a table, which can be executed as a valid
@@ -124,18 +127,18 @@ end
 -- @return the string representation which will result in a Lua table entity
 -- when evaluated
 function table.tostring(tbl)
-  local result, done = {}, {}
-  for k, v in ipairs(tbl) do
-    table.insert(result, table.val_to_str(v))
-    done[k] = true
-  end
-  for k, v in pairs(tbl) do
-    if not done[k] then
-      table.insert(result,
-        table.key_to_str(k) .. "=" .. table.val_to_str(v))
+    local result, done = {}, {}
+    for k, v in ipairs(tbl) do
+        table.insert(result, table.val_to_str(v))
+        done[k] = true
     end
-  end
-  return "{" .. table.concat(result, ",") .. "}"
+    for k, v in pairs(tbl) do
+        if not done[k] then
+            table.insert(result,
+            table.key_to_str(k) .. "=" .. table.val_to_str(v))
+        end
+    end
+    return "{" .. table.concat(result, ",") .. "}"
 end
 
 --- Get the class by name.
@@ -332,27 +335,17 @@ function nerv.print_usage(options)
                     (opt_full and '--' .. opt_full) or "",
                     (opt_short and '-' .. opt_short) or "",
                     opt_type,
-                    v.default or "",
+                    (v.default ~= nil and tostring(v.default)) or "",
                     v.desc or "")
     end
     nerv.printf("\n")
 end
 
--- function nerv.copy_file(fname1, fname2)
---     local fin, fout, err
---     fin, err = io.open(fname1, "r")
---     if fin then
---         fout, err = io.open(fname2, "w")
---     end
---     if not (fin and fout) then
---         nerv.error("[copy] from %s to %s: %s", fname1, fname2, err)
---     end
---     while true do
---         local b = fin:read(1024)
---         if b == nil then break end
---         fout:write(b)
---     end
--- end
+function table.extend(tbl1, tbl2)
+    for _, v in ipairs(tbl2) do
+        table.insert(tbl1, v)
+    end
+end
 
 -- the following lines trigger the initialization of basic modules
 
diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua
index 4156dde..38743aa 100644
--- a/nerv/layer/affine.lua
+++ b/nerv/layer/affine.lua
@@ -8,21 +8,19 @@ local AffineLayer = nerv.class('nerv.AffineLayer', 'nerv.Layer')
 --- A parameter that consists of a single matrix
 -- @type nerv.MatrixParam
 
+function MatrixParam:check(checker)
+    -- check trans matrix type
+    checker(self.trans)
+end
+
 --- Read from a file handle.
 -- @param handle the file handle
 function MatrixParam:read(handle)
     self.trans = self.gconf.mmat_type.load(handle)
-    if not self.gconf.use_cpu then
-        self.trans = self.gconf.cumat_type.new_from_host(self.trans)
-    end
 end
 
 function MatrixParam:write(handle)
-    local trans = self.trans
-    if not self.gconf.use_cpu then
-        trans = self.trans:new_to_host()
-    end
-    trans:save(handle)
+    self.trans:save(handle)
 end
 
 function MatrixParam:train_init()
@@ -30,6 +28,12 @@ function MatrixParam:train_init()
     self.correction:fill(0)
 end
 
+function MatrixParam:copy(copier)
+    local target = nerv.MatrixParam(self.id, self.gconf)
+    target.trans = copier(self.trans)
+    return target
+end
+
 function MatrixParam:_update_by_gradient(gradient, alpha, beta)
     local gconf = self.gconf
     -- momentum gain
@@ -77,25 +81,24 @@ end
 
 --- The constructor.
 function AffineLayer:__init(id, global_conf, layer_conf)
-    self.id = id
-    self.dim_in = layer_conf.dim_in
-    self.dim_out = layer_conf.dim_out
-    if layer_conf.ltp ~= nil and layer_conf.ltp1 == nil then
-        layer_conf.ltp1 = layer_conf.ltp
-    end
+    nerv.Layer.__init(self, id, global_conf, layer_conf)
+    self:check_dim_len(-1, 1) -- exactly one output, allow multiple inputs
+    self:bind_params()
+end
+
+function AffineLayer:bind_params()
     for i = 1, #self.dim_in do
         local pid = "ltp" .. i
         local pid_list = i == 1 and {pid, "ltp"} or pid
-        self["ltp" .. i] = self:find_param(pid_list, layer_conf, global_conf,
+        self["ltp" .. i] = self:find_param(pid_list, self.lconf, self.gconf,
                                             nerv.LinearTransParam,
-                                            {self.dim_in[i], self.dim_out[1]}) 
+                                            {self.dim_in[i], self.dim_out[1]})
     end
     self.ltp = self.ltp1 -- alias of ltp1
-    self.bp = self:find_param("bp", layer_conf, global_conf,
+    self.bp = self:find_param("bp", self.lconf, self.gconf,
                                 nerv.BiasParam,
                                 {1, self.dim_out[1]})
-    self.gconf = global_conf
-    self:check_dim_len(-1, 1) -- exactly one output, allow multiple inputs
+
 end
 
 function AffineLayer:init(batch_size)
@@ -142,7 +145,7 @@ function AffineLayer:back_propagate(bp_err, next_bp_err, input, output)
 end
 
 function AffineLayer:get_params()
-    local pr = nerv.ParamRepo({self.ltp1, self.bp})
+    local pr = nerv.ParamRepo({self.ltp1, self.bp}, self.loc_type)
     for i = 2, #self.dim_in do
         pr:add(self["ltp" .. i].id, self["ltp" .. i])
     end
diff --git a/nerv/layer/bias.lua b/nerv/layer/bias.lua
index 924c3da..191be78 100644
--- a/nerv/layer/bias.lua
+++ b/nerv/layer/bias.lua
@@ -1,12 +1,15 @@
 local BiasLayer = nerv.class("nerv.BiasLayer", "nerv.Layer")
 
 function BiasLayer:__init(id, global_conf, layer_conf)
-    self.id = id
-    self.gconf = global_conf
-    self.bias = layer_conf.bias
-    self.dim_in = layer_conf.dim_in
-    self.dim_out = layer_conf.dim_out
+    nerv.Layer.__init(self, id, global_conf, layer_conf)
     self:check_dim_len(1, 1)
+    self:bind_params()
+end
+
+function BiasLayer:bind_params()
+    self.bias = self:find_param("bias", self.lconf, self.gconf,
+                                nerv.BiasParam,
+                                {1, self.dim_out[1]})
 end
 
 function BiasLayer:init()
@@ -28,5 +31,5 @@ function BiasLayer:propagate(input, output)
 end
 
 function BiasLayer:get_params()
-    return nerv.ParamRepo({self.bias})
+    return nerv.ParamRepo({self.bias}, self.loc_type)
 end
diff --git a/nerv/layer/combiner.lua b/nerv/layer/combiner.lua
index 22e89a9..028c970 100644
--- a/nerv/layer/combiner.lua
+++ b/nerv/layer/combiner.lua
@@ -1,16 +1,8 @@
 local CombinerLayer = nerv.class('nerv.CombinerLayer', 'nerv.Layer')
 
 function CombinerLayer:__init(id, global_conf, layer_conf)
-    self.id = id
+    nerv.Layer.__init(self, id, global_conf, layer_conf)
     self.lambda = layer_conf.lambda
-    self.dim_in = layer_conf.dim_in
-    self.dim_out = layer_conf.dim_out
-    self.gconf = global_conf
-    if self.gconf.use_cpu then
-        self.mat_type = self.gconf.mmat_type
-    else
-        self.mat_type = self.gconf.cumat_type
-    end
     self:check_dim_len(#self.lambda, -1)
     if #self.dim_in < 1 then
         nerv.error("no input specified")
@@ -20,6 +12,10 @@ function CombinerLayer:__init(id, global_conf, layer_conf)
     end
 end
 
+function CombinerLayer:bind_params()
+    -- do nothing
+end
+
 function CombinerLayer:init(batch_size)
     local dim = self.dim_in[1]
     for i = 2, #self.dim_in do
@@ -66,5 +62,5 @@ function CombinerLayer:back_propagate(bp_err, next_bp_err, input, output)
 end
 
 function CombinerLayer:get_params()
-    return nerv.ParamRepo({})
+    return nerv.ParamRepo({}, self.loc_type)
 end
diff --git a/nerv/layer/dropout.lua b/nerv/layer/dropout.lua
index 42660cc..1a379c9 100644
--- a/nerv/layer/dropout.lua
+++ b/nerv/layer/dropout.lua
@@ -1,22 +1,18 @@
 local DropoutLayer = nerv.class("nerv.DropoutLayer", "nerv.Layer")
 
 function DropoutLayer:__init(id, global_conf, layer_conf)
-    self.id = id
-    self.gconf = global_conf
-    if self.gconf.use_cpu then
-        self.mat_type = self.gconf.mmat_type
-    else
-        self.mat_type = self.gconf.cumat_type
-    end
+    nerv.Layer.__init(self, id, global_conf, layer_conf)
     self.rate = layer_conf.dropout_rate or global_conf.dropout_rate
     if self.rate == nil then
         nerv.warning("[DropoutLayer:propagate] dropout rate is not set")
     end
-    self.dim_in = layer_conf.dim_in
-    self.dim_out = layer_conf.dim_out
     self:check_dim_len(1, 1) -- two inputs: nn output and label
 end
 
+function DropoutLayer:bind_params()
+    -- do nothing
+end
+
 function DropoutLayer:init(batch_size, chunk_size)
     if self.dim_in[1] ~= self.dim_out[1] then
         nerv.error("mismatching dimensions of input and output")
@@ -73,5 +69,5 @@ function DropoutLayer:back_propagate(bp_err, next_bp_err, input, output, t)
 end
 
 function DropoutLayer:get_params()
-    return nerv.ParamRepo({})
+    return nerv.ParamRepo({}, self.loc_type)
 end
diff --git a/nerv/layer/elem_mul.lua b/nerv/layer/elem_mul.lua
index fe80a3f..f03649b 100644
--- a/nerv/layer/elem_mul.lua
+++ b/nerv/layer/elem_mul.lua
@@ -1,14 +1,15 @@
 local ElemMulLayer = nerv.class('nerv.ElemMulLayer', 'nerv.Layer')
 
 function ElemMulLayer:__init(id, global_conf, layer_conf)
-    self.id = id
-    self.dim_in = layer_conf.dim_in
-    self.dim_out = layer_conf.dim_out
-    self.gconf = global_conf
+    nerv.Layer.__init(self, id, global_conf, layer_conf)
     -- element-wise multiplication of input[1] and input[2]
     self:check_dim_len(2, 1)
 end
 
+function ElemMulLayer:bind_params()
+    -- do nothing
+end
+
 function ElemMulLayer:init(batch_size)
     if self.dim_in[1] ~= self.dim_in[2] or
         self.dim_in[1] ~= self.dim_out[1] then
@@ -34,5 +35,5 @@ function ElemMulLayer:update(bp_err, input, output)
 end
 
 function ElemMulLayer:get_params()
-    return nerv.ParamRepo({})
+    return nerv.ParamRepo({}, self.loc_type)
 end
diff --git a/nerv/layer/gru.lua b/nerv/layer/gru.lua
index e81d21a..a590a67 100644
--- a/nerv/layer/gru.lua
+++ b/nerv/layer/gru.lua
@@ -4,11 +4,7 @@ function GRULayer:__init(id, global_conf, layer_conf)
     -- input1:x
     -- input2:h
     -- input3:c (h^~)
-    self.id = id
-    self.dim_in = layer_conf.dim_in
-    self.dim_out = layer_conf.dim_out
-    self.gconf = global_conf
-
+    nerv.Layer.__init(self, id, global_conf, layer_conf)
     if self.dim_in[2] ~= self.dim_out[1] then
         nerv.error("dim_in[2](%d) mismatch with dim_out[1](%d)",
                     self.dim_in[2], self.dim_out[1])
@@ -17,7 +13,7 @@ function GRULayer:__init(id, global_conf, layer_conf)
     -- prepare a DAGLayer to hold the lstm structure
     local pr = layer_conf.pr
     if pr == nil then
-        pr = nerv.ParamRepo()
+        pr = nerv.ParamRepo(nil, self.loc_type)
     end
     
     local function ap(str)
@@ -63,7 +59,7 @@ function GRULayer:__init(id, global_conf, layer_conf)
         },
     }
     
-    local layerRepo = nerv.LayerRepo(layers, pr, global_conf)
+    self.lrepo = nerv.LayerRepo(layers, pr, global_conf)
 
     local connections = {
         ["<input>[1]"] = ap("inputXDup[1]"),
@@ -97,12 +93,20 @@ function GRULayer:__init(id, global_conf, layer_conf)
     self.dag = nerv.DAGLayer(self.id, global_conf,
                                 {dim_in = self.dim_in,
                                 dim_out = self.dim_out,
-                                sub_layers = layerRepo,
+                                sub_layers = self.lrepo,
                                 connections = connections})
     
     self:check_dim_len(2, 1) -- x, h and h
 end
 
+function GRULayer:bind_params()
+    local pr = layer_conf.pr
+    if pr == nil then
+        pr = nerv.ParamRepo(nil, self.loc_type)
+    end
+    self.lrepo:rebind(pr)
+end
+
 function GRULayer:init(batch_size, chunk_size)
     self.dag:init(batch_size, chunk_size)
 end
diff --git a/nerv/layer/init.lua b/nerv/layer/init.lua
index 54f33ae..146ad8c 100644
--- a/nerv/layer/init.lua
+++ b/nerv/layer/init.lua
@@ -30,7 +30,18 @@ end
 local Layer = nerv.class('nerv.Layer')
 
 function Layer:__init(id, global_conf, layer_conf)
-    nerv.error_method_not_implemented()
+    self.id = id
+    self.gconf = global_conf
+    self.lconf = layer_conf
+    if self.gconf.use_cpu then
+        self.mat_type = self.gconf.mmat_type
+        self.loc_type = nerv.ParamRepo.LOC_TYPES.ON_HOST
+    else
+        self.mat_type = self.gconf.cumat_type
+        self.loc_type = nerv.ParamRepo.LOC_TYPES.ON_DEVICE
+    end
+    self.dim_in = layer_conf.dim_in
+    self.dim_out = layer_conf.dim_out
 end
 
 function Layer:init(batch_size)
@@ -66,34 +77,41 @@ function Layer:get_params()
     nerv.error_method_not_implemented()
 end
 
+function Layer:bind_params()
+    nerv.error_method_not_implemented()
+end
+
 function Layer:get_dim()
     return self.dim_in, self.dim_out
 end
 
-function Layer:find_param(pid_list, lconf, gconf, p_type, p_dim)
-    if type(pid_list) == "string" then
-        pid_list = {pid_list}
+function Layer:find_param(plist, lconf, gconf, p_type, p_dim)
+    if type(plist) == "string" then
+        plist = {plist}
     end
-    pid_list_str = table.tostring(pid_list)
-    for i, pid in ipairs(pid_list) do
-        if lconf[pid] ~= nil then
-            nerv.info("param [%s] of layer [%s] found in `layer_conf`.", pid, self.id)
-            return lconf[pid]
+    if lconf.params == nil then
+        lconf.params = {}
+    end
+    plist_str = table.tostring(plist)
+    local pid
+    for i, pname in ipairs(plist) do
+        if lconf.params[pname] ~= nil then
+            nerv.info("param id for [%s] of layer [%s] specified in `layer_conf.params`.", pname, self.id)
+            pid = lconf.params[pname]
         end
-        local pid_g = self.id .. '_' .. pid --global identifier
-        local pr = lconf.pr
-        local p
-        if pr ~= nil and pr:has_param(pid_g) == true then
-            nerv.info("param [%s] of layer [%s] found in `layer_conf.pr`.", pid_list_str, self.id)
-            p = pr:get_param(pid_g)
-            return p
+        if lconf.pr:has_param(pid) then
+            return lconf.pr:get_param(pid)
         end
     end
-    nerv.info("param [%s] of layer [%s] is not found in `layer_conf` or `layer_conf.pr`, " ..
-                "switch to auto-generate", pid_list_str, self.id)
-    local pid_g = self.id .. '_' .. pid_list[1]
-    p = p_type(pid_g, gconf)
-    p.trans = gconf.cumat_type(unpack(p_dim))
+    pid = self.id .. '_' .. plist[1]
+    if lconf.pr:has_param(pid) then
+        nerv.info("param id for [%s] of layer [%s] is generated automatically.", pname, self.id)
+        return lconf.pr:get_param(pid)
+    end
+    nerv.info("param id for [%s] of layer [%s] is not found in the specified param repo, " ..
+                "switch to auto-generate", plist_str, self.id)
+    local p = p_type(pid, gconf)
+    p.trans = self.mat_type(unpack(p_dim))
     if type(gconf.param_random) ~= "function" then
         nerv.error("a param generate function is needed")
     end
diff --git a/nerv/layer/lstm.lua b/nerv/layer/lstm.lua
index 500bd87..d4c9212 100644
--- a/nerv/layer/lstm.lua
+++ b/nerv/layer/lstm.lua
@@ -4,15 +4,11 @@ function LSTMLayer:__init(id, global_conf, layer_conf)
     -- input1:x
     -- input2:h
     -- input3:c
-    self.id = id
-    self.dim_in = layer_conf.dim_in
-    self.dim_out = layer_conf.dim_out
-    self.gconf = global_conf
-
+    nerv.Layer.__init(self, id, global_conf, layer_conf)
     -- prepare a DAGLayer to hold the lstm structure
     local pr = layer_conf.pr
     if pr == nil then
-        pr = nerv.ParamRepo()
+        pr = nerv.ParamRepo(nil, self.loc_type)
     end
     
     local function ap(str)
@@ -66,7 +62,7 @@ function LSTMLayer:__init(id, global_conf, layer_conf)
         },
     }
     
-    local layerRepo = nerv.LayerRepo(layers, pr, global_conf)
+    self.lrepo = nerv.LayerRepo(layers, pr, global_conf)
 
     local connections = {
         ["<input>[1]"] = ap("inputXDup[1]"),
@@ -109,12 +105,20 @@ function LSTMLayer:__init(id, global_conf, layer_conf)
     self.dag = nerv.DAGLayer(self.id, global_conf,
                                 {dim_in = self.dim_in,
                                  dim_out = self.dim_out,
-                                 sub_layers = layerRepo,
+                                 sub_layers = self.lrepo,
                                  connections = connections})
     
     self:check_dim_len(3, 2) -- x, h, c and h, c
 end
 
+function LSTMLayer:bind_params()
+    local pr = layer_conf.pr
+    if pr == nil then
+        pr = nerv.ParamRepo(nil, self.loc_type)
+    end
+    self.lrepo:rebind(pr)
+end
+
 function LSTMLayer:init(batch_size, chunk_size)
     self.dag:init(batch_size, chunk_size)
 end
diff --git a/nerv/layer/lstm_gate.lua b/nerv/layer/lstm_gate.lua
index 1963eba..7a27bab 100644
--- a/nerv/layer/lstm_gate.lua
+++ b/nerv/layer/lstm_gate.lua
@@ -2,20 +2,19 @@ local LSTMGateLayer = nerv.class('nerv.LSTMGateLayer', 'nerv.Layer')
 -- NOTE: this is a full matrix gate
 
 function LSTMGateLayer:__init(id, global_conf, layer_conf)
-    self.id = id
-    self.dim_in = layer_conf.dim_in
-    self.dim_out = layer_conf.dim_out
-    self.gconf = global_conf
+    nerv.Layer.__init(self, id, global_conf, layer_conf)
+    self:check_dim_len(-1, 1) --accept multiple inputs
+    self:bind_params()
+end
 
+function LSTMGateLayer:bind_params()
     for i = 1, #self.dim_in do
-        self["ltp" .. i] = self:find_param("ltp" .. i, layer_conf, global_conf,
+        self["ltp" .. i] = self:find_param("ltp" .. i, self.lconf, self.gconf,
                                             nerv.LinearTransParam,
                                             {self.dim_in[i], self.dim_out[1]})
     end
-    self.bp = self:find_param("bp", layer_conf, global_conf,
+    self.bp = self:find_param("bp", self.lconf, self.gconf,
                                 nerv.BiasParam, {1, self.dim_out[1]})
-  
-    self:check_dim_len(-1, 1) --accept multiple inputs
 end
 
 function LSTMGateLayer:init(batch_size)
@@ -69,7 +68,7 @@ function LSTMGateLayer:update(bp_err, input, output)
 end
 
 function LSTMGateLayer:get_params()
-    local pr = nerv.ParamRepo({self.bp})
+    local pr = nerv.ParamRepo({self.bp}, self.loc_type)
     for i = 1, #self.dim_in do
         pr:add(self["ltp" .. i].id, self["ltp" .. i])
     end
diff --git a/nerv/layer/mse.lua b/nerv/layer/mse.lua
index 1c218d0..458d086 100644
--- a/nerv/layer/mse.lua
+++ b/nerv/layer/mse.lua
@@ -1,18 +1,14 @@
 local MSELayer = nerv.class("nerv.MSELayer", "nerv.Layer")
 
 function MSELayer:__init(id, global_conf, layer_conf)
-    self.id = id
-    self.dim_in = layer_conf.dim_in
-    self.dim_out = layer_conf.dim_out
-    self.gconf = global_conf
-    if self.gconf.use_cpu then
-        self.mat_type = self.gconf.mmat_type
-    else
-        self.mat_type = self.gconf.cumat_type
-    end
+    nerv.Layer.__init(self, id, global_conf, layer_conf)
     self:check_dim_len(2, -1)
 end
 
+function MSELayer:bind_params()
+    -- do nothing
+end
+
 function MSELayer:init(batch_size)
     if self.dim_in[1] ~= self.dim_in[2] then
         nerv.error("mismatching dimensions of previous network output and labels")
@@ -61,5 +57,5 @@ function MSELayer:back_propagate(bp_err, next_bp_err, input, output)
 end
 
 function MSELayer:get_params()
-    return nerv.ParamRepo({})
+    return nerv.ParamRepo({}, self.loc_type)
 end
diff --git a/nerv/layer/sigmoid.lua b/nerv/layer/sigmoid.lua
index 0a8bcdc..a9f9749 100644
--- a/nerv/layer/sigmoid.lua
+++ b/nerv/layer/sigmoid.lua
@@ -1,13 +1,14 @@
 local SigmoidLayer = nerv.class("nerv.SigmoidLayer", "nerv.Layer")
 
 function SigmoidLayer:__init(id, global_conf, layer_conf)
-    self.id = id
-    self.gconf = global_conf
-    self.dim_in = layer_conf.dim_in
-    self.dim_out = layer_conf.dim_out
+    nerv.Layer.__init(self, id, global_conf, layer_conf)
     self:check_dim_len(1, 1)
 end
 
+function SigmoidLayer:bind_params()
+    -- do nothing
+end
+
 function SigmoidLayer:init()
     if self.dim_in[1] ~= self.dim_out[1] then
         nerv.error("mismatching dimensions of input and output")
@@ -31,5 +32,5 @@ function SigmoidLayer:back_propagate(bp_err, next_bp_err, input, output)
 end
 
 function SigmoidLayer:get_params()
-    return nerv.ParamRepo({})
+    return nerv.ParamRepo({}, self.loc_type)
 end
diff --git a/nerv/layer/softmax.lua b/nerv/layer/softmax.lua
index 4205b66..f7a5163 100644
--- a/nerv/layer/softmax.lua
+++ b/nerv/layer/softmax.lua
@@ -1,13 +1,14 @@
 local SoftmaxLayer = nerv.class("nerv.SoftmaxLayer", "nerv.Layer")
 
 function SoftmaxLayer:__init(id, global_conf, layer_conf)
-    self.id = id
-    self.gconf = global_conf
-    self.dim_in = layer_conf.dim_in
-    self.dim_out = layer_conf.dim_out
+    nerv.Layer.__init(self, id, global_conf, layer_conf)
     self:check_dim_len(1, 1) -- two inputs: nn output and label
 end
 
+function SoftmaxLayer:bind_params()
+    -- do nothing
+end
+
 function SoftmaxLayer:init(batch_size)
     if self.dim_in[1] ~= self.dim_out[1] then
         nerv.error("mismatching dimensions of input and output")
@@ -31,5 +32,5 @@ function SoftmaxLayer:back_propagate(bp_err, next_bp_err, input, output)
 end
 
 function SoftmaxLayer:get_params()
-    return nerv.ParamRepo({})
+    return nerv.ParamRepo({}, self.loc_type)
 end
diff --git a/nerv/layer/softmax_ce.lua b/nerv/layer/softmax_ce.lua
index d7d650e..7b4a80c 100644
--- a/nerv/layer/softmax_ce.lua
+++ b/nerv/layer/softmax_ce.lua
@@ -1,15 +1,7 @@
 local SoftmaxCELayer = nerv.class("nerv.SoftmaxCELayer", "nerv.Layer")
 
 function SoftmaxCELayer:__init(id, global_conf, layer_conf)
-    self.id = id
-    self.gconf = global_conf
-    if self.gconf.use_cpu then
-        self.mat_type = self.gconf.mmat_type
-    else
-        self.mat_type = self.gconf.cumat_type
-    end
-    self.dim_in = layer_conf.dim_in
-    self.dim_out = layer_conf.dim_out
+    nerv.Layer.__init(self, id, global_conf, layer_conf)
     self.compressed = layer_conf.compressed
     if self.compressed == nil then
         self.compressed = false
@@ -17,6 +9,10 @@ function SoftmaxCELayer:__init(id, global_conf, layer_conf)
     self:check_dim_len(2, -1) -- two inputs: nn output and label
 end
 
+function SoftmaxCELayer:bind_params()
+    -- do nothing
+end
+
 function SoftmaxCELayer:init(batch_size, chunk_size)
     if not self.compressed and (self.dim_in[1] ~= self.dim_in[2]) then
         nerv.error("mismatching dimensions of previous network output and labels")
@@ -94,5 +90,5 @@ function SoftmaxCELayer:back_propagate(bp_err, next_bp_err, input, output, t)
 end
 
 function SoftmaxCELayer:get_params()
-    return nerv.ParamRepo({})
+    return nerv.ParamRepo({}, self.loc_type)
 end
diff --git a/nerv/layer/tanh.lua b/nerv/layer/tanh.lua
index e1c32f2..7a19fc8 100644
--- a/nerv/layer/tanh.lua
+++ b/nerv/layer/tanh.lua
@@ -1,13 +1,14 @@
 local TanhLayer = nerv.class("nerv.TanhLayer", "nerv.Layer")
 
 function TanhLayer:__init(id, global_conf, layer_conf)
-    self.id = id
-    self.gconf = global_conf
-    self.dim_in = layer_conf.dim_in
-    self.dim_out = layer_conf.dim_out
+    nerv.Layer.__init(self, id, global_conf, layer_conf)
     self:check_dim_len(1, 1)
 end
 
+function TanhLayer:bind_params()
+    -- do nothing
+end
+
 function TanhLayer:init()
     if self.dim_in[1] ~= self.dim_out[1] then
         nerv.error("mismatching dimensions of input and output")
@@ -31,5 +32,5 @@ function TanhLayer:back_propagate(bp_err, next_bp_err, input, output)
 end
 
 function TanhLayer:get_params()
-    return nerv.ParamRepo({})
+    return nerv.ParamRepo({}, self.loc_type)
 end
diff --git a/nerv/layer/window.lua b/nerv/layer/window.lua
index 4933de0..364929f 100644
--- a/nerv/layer/window.lua
+++ b/nerv/layer/window.lua
@@ -1,12 +1,15 @@
 local WindowLayer = nerv.class("nerv.WindowLayer", "nerv.Layer")
 
 function WindowLayer:__init(id, global_conf, layer_conf)
-    self.id = id
-    self.gconf = global_conf
-    self.window = layer_conf.window
-    self.dim_in = layer_conf.dim_in
-    self.dim_out = layer_conf.dim_out
+    nerv.Layer.__init(self, id, global_conf, layer_conf)
     self:check_dim_len(1, 1)
+    self:bind_params()
+end
+
+function WindowLayer:bind_params()
+    self.window = self:find_param("window", self.lconf, self.gconf,
+                                nerv.BiasParam,
+                                {1, self.dim_out[1]})
 end
 
 function WindowLayer:init()
@@ -28,5 +31,5 @@ function WindowLayer:propagate(input, output)
 end
 
 function WindowLayer:get_params()
-    return nerv.ParamRepo({self.window})
+    return nerv.ParamRepo({self.window}, self.loc_type)
 end
diff --git a/nerv/matrix/init.lua b/nerv/matrix/init.lua
index ef2fb6b..cf85004 100644
--- a/nerv/matrix/init.lua
+++ b/nerv/matrix/init.lua
@@ -87,6 +87,17 @@ function nerv.Matrix:__mul__(b)
     return c
 end
 
+--- A wrapper function for `copy_from`
+function nerv.Matrix:copy_to(b, ...)
+    b:copy_from(self, ...)
+end
+
+--- The base class for all device (in-GPU) matrices
+-- @type nerv.CuMatrix
+
+--- A wrapper function for `copy_fromd`
+nerv.CuMatrix.copy_tod = nerv.Matrix.copy_to
+
 --- CUDA float matrices
 -- @type nerv.CuMatrixFloat
 
@@ -127,6 +138,14 @@ end
 -- @type nerv.MMatrix
 
 --- A wrapper function for `copy_fromh`
-function nerv.MMatrix:copy_toh(b, ...)
+nerv.MMatrix.copy_toh = nerv.Matrix.copy_to
+
+--- A wrapper function for `nerv.CuMatrix` copy
+function nerv.MMatrix:copy_fromd(b, ...)
+    b:copy_toh(self, ...)
+end
+
+--- A wrapper function for `nerv.CuMatrix` copy
+function nerv.MMatrix:copy_tod(b, ...)
     b:copy_fromh(self, ...)
 end
diff --git a/nerv/nerv b/nerv/nerv
index f73d517..4c20ec7 100644
--- a/nerv/nerv
+++ b/nerv/nerv
@@ -3,6 +3,7 @@ require 'nerv'
 local options = {{"help", "h", "boolean", default = false, desc = "print this help message"},
                  {"use-cpu", "c", "boolean", default = false, desc = "use CPU by default (instead of gpu by default)"},
                  {"select-gpu", nil, "int", default = -1, desc = "select the GPU for computation, fallback to auto mode if not specified"}}
+econf = {} -- environment configuration
 
 local function print_help()
     nerv.printf("Usage: <nerv_prog> [options] script.lua\n")
@@ -31,6 +32,9 @@ if not opts["use-cpu"].val then
     _add_profile_method(nerv.CuMatrix)
     nerv.CuMatrix.select_gpu =
             function (dev) nerv.CuMatrix.