19 files changed, 168 insertions, 158 deletions
diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua
index 4156dde..38743aa 100644
--- a/nerv/layer/affine.lua
+++ b/nerv/layer/affine.lua
@@ -8,21 +8,19 @@ local AffineLayer = nerv.class('nerv.AffineLayer', 'nerv.Layer')
 --- A parameter that consists of a single matrix
 -- @type nerv.MatrixParam
 
+function MatrixParam:check(checker)
+    -- check trans matrix type
+    checker(self.trans)
+end
+
 --- Read from a file handle.
 -- @param handle the file handle
 function MatrixParam:read(handle)
     self.trans = self.gconf.mmat_type.load(handle)
-    if not self.gconf.use_cpu then
-        self.trans = self.gconf.cumat_type.new_from_host(self.trans)
-    end
 end
 
 function MatrixParam:write(handle)
-    local trans = self.trans
-    if not self.gconf.use_cpu then
-        trans = self.trans:new_to_host()
-    end
-    trans:save(handle)
+    self.trans:save(handle)
 end
 
 function MatrixParam:train_init()
@@ -30,6 +28,12 @@ function MatrixParam:train_init()
     self.correction:fill(0)
 end
 
+function MatrixParam:copy(copier)
+    local target = nerv.MatrixParam(self.id, self.gconf)
+    target.trans = copier(self.trans)
+    return target
+end
+
 function MatrixParam:_update_by_gradient(gradient, alpha, beta)
     local gconf = self.gconf
     -- momentum gain
@@ -77,25 +81,24 @@ end
 
 --- The constructor.
 function AffineLayer:__init(id, global_conf, layer_conf)
-    self.id = id
-    self.dim_in = layer_conf.dim_in
-    self.dim_out = layer_conf.dim_out
-    if layer_conf.ltp ~= nil and layer_conf.ltp1 == nil then
-        layer_conf.ltp1 = layer_conf.ltp
-    end
+    nerv.Layer.__init(self, id, global_conf, layer_conf)
+    self:check_dim_len(-1, 1) -- exactly one output, allow multiple inputs
+    self:bind_params()
+end
+
+function AffineLayer:bind_params()
     for i = 1, #self.dim_in do
         local pid = "ltp" .. i
         local pid_list = i == 1 and {pid, "ltp"} or pid
-        self["ltp" .. i] = self:find_param(pid_list, layer_conf, global_conf,
+        self["ltp" .. i] = self:find_param(pid_list, self.lconf, self.gconf,
                                             nerv.LinearTransParam,
-                                            {self.dim_in[i], self.dim_out[1]}) 
+                                            {self.dim_in[i], self.dim_out[1]})
     end
     self.ltp = self.ltp1 -- alias of ltp1
-    self.bp = self:find_param("bp", layer_conf, global_conf,
+    self.bp = self:find_param("bp", self.lconf, self.gconf,
                                 nerv.BiasParam,
                                 {1, self.dim_out[1]})
-    self.gconf = global_conf
-    self:check_dim_len(-1, 1) -- exactly one output, allow multiple inputs
+
 end
 
 function AffineLayer:init(batch_size)
@@ -142,7 +145,7 @@ function AffineLayer:back_propagate(bp_err, next_bp_err, input, output)
 end
 
 function AffineLayer:get_params()
-    local pr = nerv.ParamRepo({self.ltp1, self.bp})
+    local pr = nerv.ParamRepo({self.ltp1, self.bp}, self.loc_type)
     for i = 2, #self.dim_in do
         pr:add(self["ltp" .. i].id, self["ltp" .. i])
     end
diff --git a/nerv/layer/bias.lua b/nerv/layer/bias.lua
index 924c3da..191be78 100644
--- a/nerv/layer/bias.lua
+++ b/nerv/layer/bias.lua
@@ -1,12 +1,15 @@
 local BiasLayer = nerv.class("nerv.BiasLayer", "nerv.Layer")
 
 function BiasLayer:__init(id, global_conf, layer_conf)
-    self.id = id
-    self.gconf = global_conf
-    self.bias = layer_conf.bias
-    self.dim_in = layer_conf.dim_in
-    self.dim_out = layer_conf.dim_out
+    nerv.Layer.__init(self, id, global_conf, layer_conf)
     self:check_dim_len(1, 1)
+    self:bind_params()
+end
+
+function BiasLayer:bind_params()
+    self.bias = self:find_param("bias", self.lconf, self.gconf,
+                                nerv.BiasParam,
+                                {1, self.dim_out[1]})
 end
 
 function BiasLayer:init()
@@ -28,5 +31,5 @@ function BiasLayer:propagate(input, output)
 end
 
 function BiasLayer:get_params()
-    return nerv.ParamRepo({self.bias})
+    return nerv.ParamRepo({self.bias}, self.loc_type)
 end
diff --git a/nerv/layer/combiner.lua b/nerv/layer/combiner.lua
index 22e89a9..028c970 100644
--- a/nerv/layer/combiner.lua
+++ b/nerv/layer/combiner.lua
@@ -1,16 +1,8 @@
 local CombinerLayer = nerv.class('nerv.CombinerLayer', 'nerv.Layer')
 
 function CombinerLayer:__init(id, global_conf, layer_conf)
-    self.id = id
+    nerv.Layer.__init(self, id, global_conf, layer_conf)
     self.lambda = layer_conf.lambda
-    self.dim_in = layer_conf.dim_in
-    self.dim_out = layer_conf.dim_out
-    self.gconf = global_conf
-    if self.gconf.use_cpu then
-        self.mat_type = self.gconf.mmat_type
-    else
-        self.mat_type = self.gconf.cumat_type
-    end
     self:check_dim_len(#self.lambda, -1)
     if #self.dim_in < 1 then
         nerv.error("no input specified")
@@ -20,6 +12,10 @@ function CombinerLayer:__init(id, global_conf, layer_conf)
     end
 end
 
+function CombinerLayer:bind_params()
+    -- do nothing
+end
+
 function CombinerLayer:init(batch_size)
     local dim = self.dim_in[1]
     for i = 2, #self.dim_in do
@@ -66,5 +62,5 @@ function CombinerLayer:back_propagate(bp_err, next_bp_err, input, output)
 end
 
 function CombinerLayer:get_params()
-    return nerv.ParamRepo({})
+    return nerv.ParamRepo({}, self.loc_type)
 end
diff --git a/nerv/layer/dropout.lua b/nerv/layer/dropout.lua
index 42660cc..1a379c9 100644
--- a/nerv/layer/dropout.lua
+++ b/nerv/layer/dropout.lua
@@ -1,22 +1,18 @@
 local DropoutLayer = nerv.class("nerv.DropoutLayer", "nerv.Layer")
 
 function DropoutLayer:__init(id, global_conf, layer_conf)
-    self.id = id
-    self.gconf = global_conf
-    if self.gconf.use_cpu then
-        self.mat_type = self.gconf.mmat_type
-    else
-        self.mat_type = self.gconf.cumat_type
-    end
+    nerv.Layer.__init(self, id, global_conf, layer_conf)
     self.rate = layer_conf.dropout_rate or global_conf.dropout_rate
     if self.rate == nil then
         nerv.warning("[DropoutLayer:propagate] dropout rate is not set")
     end
-    self.dim_in = layer_conf.dim_in
-    self.dim_out = layer_conf.dim_out
     self:check_dim_len(1, 1) -- two inputs: nn output and label
 end
 
+function DropoutLayer:bind_params()
+    -- do nothing
+end
+
 function DropoutLayer:init(batch_size, chunk_size)
     if self.dim_in[1] ~= self.dim_out[1] then
         nerv.error("mismatching dimensions of input and output")
@@ -73,5 +69,5 @@ function DropoutLayer:back_propagate(bp_err, next_bp_err, input, output, t)
 end
 
 function DropoutLayer:get_params()
-    return nerv.ParamRepo({})
+    return nerv.ParamRepo({}, self.loc_type)
 end
diff --git a/nerv/layer/duplicate.lua b/nerv/layer/duplicate.lua
index 8988617..137472b 100644
--- a/nerv/layer/duplicate.lua
+++ b/nerv/layer/duplicate.lua
@@ -1,10 +1,7 @@
 local DuplicateLayer = nerv.class('nerv.DuplicateLayer', 'nerv.Layer')
 
 function DuplicateLayer:__init(id, global_conf, layer_conf)
-    self.id = id
-    self.dim_in = layer_conf.dim_in
-    self.dim_out = layer_conf.dim_out
-    self.gconf = global_conf
+    nerv.Layer.__init(self, id, global_conf, layer_conf)
     self:check_dim_len(1, -1)
     if #self.dim_out < 1 then
         nerv.error('no output specified')
@@ -40,5 +37,5 @@ function DuplicateLayer:update()
 end
 
 function DuplicateLayer:get_params()
-    return nerv.ParamRepo({})
+    return nerv.ParamRepo({}, self.loc_type)
 end
diff --git a/nerv/layer/elem_mul.lua b/nerv/layer/elem_mul.lua
index fe80a3f..f03649b 100644
--- a/nerv/layer/elem_mul.lua
+++ b/nerv/layer/elem_mul.lua
@@ -1,14 +1,15 @@
 local ElemMulLayer = nerv.class('nerv.ElemMulLayer', 'nerv.Layer')
 
 function ElemMulLayer:__init(id, global_conf, layer_conf)
-    self.id = id
-    self.dim_in = layer_conf.dim_in
-    self.dim_out = layer_conf.dim_out
-    self.gconf = global_conf
+    nerv.Layer.__init(self, id, global_conf, layer_conf)
     -- element-wise multiplication of input[1] and input[2]
     self:check_dim_len(2, 1)
 end
 
+function ElemMulLayer:bind_params()
+    -- do nothing
+end
+
 function ElemMulLayer:init(batch_size)
     if self.dim_in[1] ~= self.dim_in[2] or
         self.dim_in[1] ~= self.dim_out[1] then
@@ -34,5 +35,5 @@ function ElemMulLayer:update(bp_err, input, output)
 end
 
 function ElemMulLayer:get_params()
-    return nerv.ParamRepo({})
+    return nerv.ParamRepo({}, self.loc_type)
 end
diff --git a/nerv/layer/graph.lua b/nerv/layer/graph.lua
index 1406eff..5f42fca 100644
--- a/nerv/layer/graph.lua
+++ b/nerv/layer/graph.lua
@@ -1,10 +1,7 @@
 local GraphLayer = nerv.class('nerv.GraphLayer', 'nerv.Layer')
 
 function GraphLayer:__init(id, global_conf, layer_conf)
-    self.id = id
-    self.dim_in = layer_conf.dim_in
-    self.dim_out = layer_conf.dim_out
-    self.gconf = global_conf
+    nerv.Layer.__init(self, id, global_conf, layer_conf)
     self:graph_init(layer_conf.layer_repo, layer_conf.connections)
 end
 
@@ -155,5 +152,5 @@ function GraphLayer:get_params()
             table.insert(param_repos, ref.layer:get_params())
         end
     end
-    return nerv.ParamRepo.merge(param_repos)
+    return nerv.ParamRepo.merge(param_repos, self.loc_type)
 end
diff --git a/nerv/layer/gru.lua b/nerv/layer/gru.lua
index e81d21a..71718d7 100644
--- a/nerv/layer/gru.lua
+++ b/nerv/layer/gru.lua
@@ -4,11 +4,7 @@ function GRULayer:__init(id, global_conf, layer_conf)
     -- input1:x
     -- input2:h
     -- input3:c (h^~)
-    self.id = id
-    self.dim_in = layer_conf.dim_in
-    self.dim_out = layer_conf.dim_out
-    self.gconf = global_conf
-
+    nerv.Layer.__init(self, id, global_conf, layer_conf)
     if self.dim_in[2] ~= self.dim_out[1] then
         nerv.error("dim_in[2](%d) mismatch with dim_out[1](%d)",
                     self.dim_in[2], self.dim_out[1])
@@ -17,7 +13,7 @@ function GRULayer:__init(id, global_conf, layer_conf)
     -- prepare a DAGLayer to hold the lstm structure
     local pr = layer_conf.pr
     if pr == nil then
-        pr = nerv.ParamRepo()
+        pr = nerv.ParamRepo({}, self.loc_type)
     end
     
     local function ap(str)
@@ -63,7 +59,7 @@ function GRULayer:__init(id, global_conf, layer_conf)
         },
     }
     
-    local layerRepo = nerv.LayerRepo(layers, pr, global_conf)
+    self.lrepo = nerv.LayerRepo(layers, pr, global_conf)
 
     local connections = {
         ["<input>[1]"] = ap("inputXDup[1]"),
@@ -97,12 +93,20 @@ function GRULayer:__init(id, global_conf, layer_conf)
     self.dag = nerv.DAGLayer(self.id, global_conf,
                                 {dim_in = self.dim_in,
                                 dim_out = self.dim_out,
-                                sub_layers = layerRepo,
+                                sub_layers = self.lrepo,
                                 connections = connections})
     
     self:check_dim_len(2, 1) -- x, h and h
 end
 
+function GRULayer:bind_params()
+    local pr = layer_conf.pr
+    if pr == nil then
+        pr = nerv.ParamRepo({}, self.loc_type)
+    end
+    self.lrepo:rebind(pr)
+end
+
 function GRULayer:init(batch_size, chunk_size)
     self.dag:init(batch_size, chunk_size)
 end
diff --git a/nerv/layer/identity.lua b/nerv/layer/identity.lua
index aeeff89..d56337d 100644
--- a/nerv/layer/identity.lua
+++ b/nerv/layer/identity.lua
@@ -1,10 +1,7 @@
 local IdentityLayer = nerv.class('nerv.IdentityLayer', 'nerv.Layer')
 
 function IdentityLayer:__init(id, global_conf, layer_conf)
-    self.id = id
-    self.dim_in = layer_conf.dim_in
-    self.dim_out = layer_conf.dim_out
-    self.gconf = global_conf
+    nerv.Layer.__init(self, id, global_conf, layer_conf)
     self:check_dim_len(1, 1)
     if self.dim_in[1] ~= self.dim_out[1] then
         nerv.error('mismatching dimensions of input and output')
@@ -29,5 +26,5 @@ function IdentityLayer:update()
 end
 
 function IdentityLayer:get_params()
-    return nerv.ParamRepo({})
+    return nerv.ParamRepo({}, self.loc_type)
 end
diff --git a/nerv/layer/init.lua b/nerv/layer/init.lua
index 4fabefa..475ef62 100644
--- a/nerv/layer/init.lua
+++ b/nerv/layer/init.lua
@@ -30,7 +30,18 @@ end
 local Layer = nerv.class('nerv.Layer')
 
 function Layer:__init(id, global_conf, layer_conf)
-    nerv.error_method_not_implemented()
+    self.id = id
+    self.gconf = global_conf
+    self.lconf = layer_conf
+    if self.gconf.use_cpu then
+        self.mat_type = self.gconf.mmat_type
+        self.loc_type = nerv.ParamRepo.LOC_TYPES.ON_HOST
+    else
+        self.mat_type = self.gconf.cumat_type
+        self.loc_type = nerv.ParamRepo.LOC_TYPES.ON_DEVICE
+    end
+    self.dim_in = layer_conf.dim_in
+    self.dim_out = layer_conf.dim_out
 end
 
 function Layer:init(batch_size)
@@ -66,6 +77,10 @@ function Layer:get_params()
     nerv.error_method_not_implemented()
 end
 
+function Layer:bind_params()
+    nerv.error_method_not_implemented()
+end
+
 function Layer:get_dim()
     return self.dim_in, self.dim_out
 end
@@ -78,30 +93,33 @@ function Layer:get_sublayer(id)
     nerv.error('primitive layer does not have sublayers')
 end
 
-function Layer:find_param(pid_list, lconf, gconf, p_type, p_dim)
-    if type(pid_list) == "string" then
-        pid_list = {pid_list}
+function Layer:find_param(plist, lconf, gconf, p_type, p_dim)
+    if type(plist) == "string" then
+        plist = {plist}
     end
-    pid_list_str = table.tostring(pid_list)
-    for i, pid in ipairs(pid_list) do
-        if lconf[pid] ~= nil then
-            nerv.info("param [%s] of layer [%s] found in `layer_conf`.", pid, self.id)
-            return lconf[pid]
+    if lconf.params == nil then
+        lconf.params = {}
+    end
+    plist_str = table.tostring(plist)
+    local pid
+    for i, pname in ipairs(plist) do
+        if lconf.params[pname] ~= nil then
+            nerv.info("param id for [%s] of layer [%s] specified in `layer_conf.params`.", pname, self.id)
+            pid = lconf.params[pname]
         end
-        local pid_g = self.id .. '_' .. pid --global identifier
-        local pr = lconf.pr
-        local p
-        if pr ~= nil and pr:has_param(pid_g) == true then
-            nerv.info("param [%s] of layer [%s] found in `layer_conf.pr`.", pid_list_str, self.id)
-            p = pr:get_param(pid_g)
-            return p
+        if lconf.pr:has_param(pid) then
+            return lconf.pr:get_param(pid)
         end
     end
-    nerv.info("param [%s] of layer [%s] is not found in `layer_conf` or `layer_conf.pr`, " ..
-                "switch to auto-generate", pid_list_str, self.id)
-    local pid_g = self.id .. '_' .. pid_list[1]
-    p = p_type(pid_g, gconf)
-    p.trans = gconf.cumat_type(unpack(p_dim))
+    pid = self.id .. '_' .. plist[1]
+    if lconf.pr:has_param(pid) then
+        nerv.info("param id for [%s] of layer [%s] is generated automatically.", pname, self.id)
+        return lconf.pr:get_param(pid)
+    end
+    nerv.info("param id for [%s] of layer [%s] is not found in the specified param repo, " ..
+                "switch to auto-generate", plist_str, self.id)
+    local p = p_type(pid, gconf)
+    p.trans = self.mat_type(unpack(p_dim))
     if type(gconf.param_random) ~= "function" then
         nerv.error("a param generate function is needed")
     end
diff --git a/nerv/layer/lstm.lua b/nerv/layer/lstm.lua
index caa7569..641d5dc 100644
--- a/nerv/layer/lstm.lua
+++ b/nerv/layer/lstm.lua
@@ -4,15 +4,11 @@ function LSTMLayer:__init(id, global_conf, layer_conf)
     -- input1:x
     -- input2:h
     -- input3:c
-    self.id = id
-    self.dim_in = layer_conf.dim_in
-    self.dim_out = layer_conf.dim_out
-    self.gconf = global_conf
-
+    nerv.Layer.__init(self, id, global_conf, layer_conf)
     -- prepare a DAGLayer to hold the lstm structure
     local pr = layer_conf.pr
     if pr == nil then
-        pr = nerv.ParamRepo()
+        pr = nerv.ParamRepo({}, self.loc_type)
     end
     
     local function ap(str)
@@ -66,7 +62,7 @@ function LSTMLayer:__init(id, global_conf, layer_conf)
         },
     }
     
-    local layerRepo = nerv.LayerRepo(layers, pr, global_conf)
+    self.lrepo = nerv.LayerRepo(layers, pr, global_conf)
 
     local connections = {
         ["<input>[1]"] = ap("inputXDup[1]"),
@@ -109,12 +105,20 @@ function LSTMLayer:__init(id, global_conf, layer_conf)
     self.dag = nerv.DAGLayer(self.id, global_conf,
                                 {dim_in = self.dim_in,
                                  dim_out = self.dim_out,
-                                 sub_layers = layerRepo,
+                                 sub_layers = self.lrepo,
                                  connections = connections})
     
     self:check_dim_len(3, 2) -- x, h, c and h, c
 end
 
+function LSTMLayer:bind_params()
+    local pr = layer_conf.pr
+    if pr == nil then
+        pr = nerv.ParamRepo({}, self.loc_type)
+    end
+    self.lrepo:rebind(pr)
+end
+
 function LSTMLayer:init(batch_size, chunk_size)
     self.dag:init(batch_size, chunk_size)
 end
diff --git a/nerv/layer/lstm_gate.lua b/nerv/layer/lstm_gate.lua
index 1963eba..7a27bab 100644
--- a/nerv/layer/lstm_gate.lua
+++ b/nerv/layer/lstm_gate.lua
@@ -2,20 +2,19 @@ local LSTMGateLayer = nerv.class('nerv.LSTMGateLayer', 'nerv.Layer')
 -- NOTE: this is a full matrix gate
 
 function LSTMGateLayer:__init(id, global_conf, layer_conf)
-    self.id = id
-    self.dim_in = layer_conf.dim_in
-    self.dim_out = layer_conf.dim_out
-    self.gconf = global_conf
+    nerv.Layer.__init(self, id, global_conf, layer_conf)
+    self:check_dim_len(-1, 1) --accept multiple inputs
+    self:bind_params()
+end
 
+function LSTMGateLayer:bind_params()
     for i = 1, #self.dim_in do
-        self["ltp" .. i] = self:find_param("ltp" .. i, layer_conf, global_conf,
+        self["ltp" .. i] = self:find_param("ltp" .. i, self.lconf, self.gconf,
                                             nerv.LinearTransParam,
                                             {self.dim_in[i], self.dim_out[1]})
     end
-    self.bp = self:find_param("bp", layer_conf, global_conf,
+    self.bp = self:find_param("bp", self.lconf, self.gconf,
                                 nerv.BiasParam, {1, self.dim_out[1]})
-  
-    self:check_dim_len(-1, 1) --accept multiple inputs
 end
 
 function LSTMGateLayer:init(batch_size)
@@ -69,7 +68,7 @@ function LSTMGateLayer:update(bp_err, input, output)
 end
 
 function LSTMGateLayer:get_params()
-    local pr = nerv.ParamRepo({self.bp})
+    local pr = nerv.ParamRepo({self.bp}, self.loc_type)
     for i = 1, #self.dim_in do
         pr:add(self["ltp" .. i].id, self["ltp" .. i])
     end
diff --git a/nerv/layer/mse.lua b/nerv/layer/mse.lua
index 1c218d0..458d086 100644
--- a/nerv/layer/mse.lua
+++ b/nerv/layer/mse.lua
@@ -1,18 +1,14 @@
 local MSELayer = nerv.class("nerv.MSELayer", "nerv.Layer")
 
 function MSELayer:__init(id, global_conf, layer_conf)
-    self.id = id
-    self.dim_in = layer_conf.dim_in
-    self.dim_out = layer_conf.dim_out
-    self.gconf = global_conf
-    if self.gconf.use_cpu then
-        self.mat_type = self.gconf.mmat_type
-    else
-        self.mat_type = self.gconf.cumat_type
-    end
+    nerv.Layer.__init(self, id, global_conf, layer_conf)
     self:check_dim_len(2, -1)
 end
 
+function MSELayer:bind_params()
+    -- do nothing
+end
+
 function MSELayer:init(batch_size)
     if self.dim_in[1] ~= self.dim_in[2] then
         nerv.error("mismatching dimensions of previous network output and labels")
@@ -61,5 +57,5 @@ function MSELayer:back_propagate(bp_err, next_bp_err, input, output)
 end
 
 function MSELayer:get_params()
-    return nerv.ParamRepo({})
+    return nerv.ParamRepo({}, self.loc_type)
 end
diff --git a/nerv/layer/rnn.lua b/nerv/layer/rnn.lua
index 38f2326..e59cf5b 100644
--- a/nerv/layer/rnn.lua
+++ b/nerv/layer/rnn.lua
@@ -1,10 +1,7 @@
 local RNNLayer = nerv.class('nerv.RNNLayer', 'nerv.GraphLayer')
 
 function RNNLayer:__init(id, global_conf, layer_conf)
-    self.id = id
-    self.dim_in = layer_conf.dim_in
-    self.dim_out = layer_conf.dim_out
-    self.gconf = layer_conf.gconf
+    nerv.Layer.__init(self, id, global_conf, layer_conf)
     self:check_dim_len(1, 1)
 
     local din = layer_conf.dim_in[1]
@@ -12,7 +9,7 @@ function RNNLayer:__init(id, global_conf, layer_conf)
 
     local pr = layer_conf.pr
     if pr == nil then
-        pr = nerv.ParamRepo()
+        pr = nerv.ParamRepo({}, self.loc_type)
     end
 
     local layers = {
diff --git a/nerv/layer/sigmoid.lua b/nerv/layer/sigmoid.lua
index 0a8bcdc..a9f9749 100644
--- a/nerv/layer/sigmoid.lua
+++ b/nerv/layer/sigmoid.lua
@@ -1,13 +1,14 @@
 local SigmoidLayer = nerv.class("nerv.SigmoidLayer", "nerv.Layer")
 
 function SigmoidLayer:__init(id, global_conf, layer_conf)
-    self.id = id
-    self.gconf = global_conf
-    self.dim_in = layer_conf.dim_in
-    self.dim_out = layer_conf.dim_out
+    nerv.Layer.__init(self, id, global_conf, layer_conf)
     self:check_dim_len(1, 1)
 end
 
+function SigmoidLayer:bind_params()
+    -- do nothing
+end
+
 function SigmoidLayer:init()
     if self.dim_in[1] ~= self.dim_out[1] then
         nerv.error("mismatching dimensions of input and output")
@@ -31,5 +32,5 @@ function SigmoidLayer:back_propagate(bp_err, next_bp_err, input, output)
 end
 
 function SigmoidLayer:get_params()
-    return nerv.ParamRepo({})
+    return nerv.ParamRepo({}, self.loc_type)
 end
diff --git a/nerv/layer/softmax.lua b/nerv/layer/softmax.lua
index 4205b66..f7a5163 100644
--- a/nerv/layer/softmax.lua
+++ b/nerv/layer/softmax.lua
@@ -1,13 +1,14 @@
 local SoftmaxLayer = nerv.class("nerv.SoftmaxLayer", "nerv.Layer")
 
 function SoftmaxLayer:__init(id, global_conf, layer_conf)
-    self.id = id
-    self.gconf = global_conf
-    self.dim_in = layer_conf.dim_in
-    self.dim_out = layer_conf.dim_out
+    nerv.Layer.__init(self, id, global_conf, layer_conf)
     self:check_dim_len(1, 1) -- two inputs: nn output and label
 end
 
+function SoftmaxLayer:bind_params()
+    -- do nothing
+end
+
 function SoftmaxLayer:init(batch_size)
     if self.dim_in[1] ~= self.dim_out[1] then
         nerv.error("mismatching dimensions of input and output")
@@ -31,5 +32,5 @@ function SoftmaxLayer:back_propagate(bp_err, next_bp_err, input, output)
 end
 
 function SoftmaxLayer:get_params()
-    return nerv.ParamRepo({})
+    return nerv.ParamRepo({}, self.loc_type)
 end
diff --git a/nerv/layer/softmax_ce.lua b/nerv/layer/softmax_ce.lua
index d7d650e..7b4a80c 100644
--- a/nerv/layer/softmax_ce.lua
+++ b/nerv/layer/softmax_ce.lua
@@ -1,15 +1,7 @@
 local SoftmaxCELayer = nerv.class("nerv.SoftmaxCELayer", "nerv.Layer")
 
 function SoftmaxCELayer:__init(id, global_conf, layer_conf)
-    self.id = id
-    self.gconf = global_conf
-    if self.gconf.use_cpu then
-        self.mat_type = self.gconf.mmat_type
-    else
-        self.mat_type = self.gconf.cumat_type
-    end
-    self.dim_in = layer_conf.dim_in
-    self.dim_out = layer_conf.dim_out
+    nerv.Layer.__init(self, id, global_conf, layer_conf)
     self.compressed = layer_conf.compressed
     if self.compressed == nil then
         self.compressed = false
@@ -17,6 +9,10 @@ function SoftmaxCELayer:__init(id, global_conf, layer_conf)
     self:check_dim_len(2, -1) -- two inputs: nn output and label
 end
 
+function SoftmaxCELayer:bind_params()
+    -- do nothing
+end
+
 function SoftmaxCELayer:init(batch_size, chunk_size)
     if not self.compressed and (self.dim_in[1] ~= self.dim_in[2]) then
         nerv.error("mismatching dimensions of previous network output and labels")
@@ -94,5 +90,5 @@ function SoftmaxCELayer:back_propagate(bp_err, next_bp_err, input, output, t)
 end
 
 function SoftmaxCELayer:get_params()
-    return nerv.ParamRepo({})
+    return nerv.ParamRepo({}, self.loc_type)
 end
diff --git a/nerv/layer/tanh.lua b/nerv/layer/tanh.lua
index e1c32f2..7a19fc8 100644
--- a/nerv/layer/tanh.lua
+++ b/nerv/layer/tanh.lua
@@ -1,13 +1,14 @@
 local TanhLayer = nerv.class("nerv.TanhLayer", "nerv.Layer")
 
 function TanhLayer:__init(id, global_conf, layer_conf)
-    self.id = id
-    self.gconf = global_conf
-    self.dim_in = layer_conf.dim_in
-    self.dim_out = layer_conf.dim_out
+    nerv.Layer.__init(self, id, global_conf, layer_conf)
     self:check_dim_len(1, 1)
 end
 
+function TanhLayer:bind_params()
+    -- do nothing
+end
+
 function TanhLayer:init()
     if self.dim_in[1] ~= self.dim_out[1] then
         nerv.error("mismatching dimensions of input and output")
@@ -31,5 +32,5 @@ function TanhLayer:back_propagate(bp_err, next_bp_err, input, output)
 end
 
 function TanhLayer:get_params()
-    return nerv.ParamRepo({})
+    return nerv.ParamRepo({}, self.loc_type)
 end
diff --git a/nerv/layer/window.lua b/nerv/layer/window.lua
index 4933de0..364929f 100644
--- a/nerv/layer/window.lua
+++ b/nerv/layer/window.lua
@@ -1,12 +1,15 @@
 local WindowLayer = nerv.class("nerv.WindowLayer", "nerv.Layer")
 
 function WindowLayer:__init(id, global_conf, layer_conf)
-    self.id = id
-    self.gconf = global_conf
-    self.window = layer_conf.window
-    self.dim_in = layer_conf.dim_in
-    self.dim_out = layer_conf.dim_out
+    nerv.Layer.__init(self, id, global_conf, layer_conf)
     self:check_dim_len(1, 1)
+    self:bind_params()
+end
+
+function WindowLayer:bind_params()
+    self.window = self:find_param("window", self.lconf, self.gconf,
+                                nerv.BiasParam,
+                                {1, self.dim_out[1]})
 end
 
 function WindowLayer:init()
@@ -28,5 +31,5 @@ function WindowLayer:propagate(input, output)
 end
 
 function WindowLayer:get_params()
-    return nerv.ParamRepo({self.window})
+    return nerv.ParamRepo({self.window}, self.loc_type)
 end