make `bind_params` and `init` consistent in `affine.lua`,

`lstm_gate.lua`, `projection.lua`
author: Determinant <ted.sybil@gmail.com> 2016-05-05 22:09:17 +0800
committer: Determinant <ted.sybil@gmail.com> 2016-05-05 22:09:17 +0800
commit: 1c7e2c3da330e91c504a8d210290305f4a553af7 (patch)
tree: 3f5eb78204145dd4558fda34916bd0bd5e50431f
parent: 40da326da485cae4e0a72a96db1fb0a3c90757e4 (diff)
4 files changed, 26 insertions, 17 deletions
diff --git a/nerv/doc/source/overview.rst b/nerv/doc/source/overview.rst
index 8ff1dd5..5c75bd8 100644
--- a/nerv/doc/source/overview.rst
+++ b/nerv/doc/source/overview.rst
@@ -6,7 +6,7 @@ What is NERV?
 
 NERV is a general-purpose deep learning toolkit designed to be be simple,
 lightweight, extensible and complete. The name "NERV" comes from the German
-word "nerv" for "nerve" in English. It is also a fictional organization in the
+word "nerv" for "nerve" in English. It is also a fictional organization in
 *Neon Genesis Evangelion*, a famous anime.
 
 
@@ -18,7 +18,8 @@ and related deep learning techniques have given rise to many tools and toolkits
 which are designed for constructing and training neural networks which could
 facilitate and routinize the research cycle of deep learning applied to areas
 such as speech processing, computer vision, natural language processing and so
-on. Such tools or toolkits can be categorized into two by design: task-specific or general-purpose. 
+on. Such tools or toolkits can be categorized into two by design: task-specific
+or general-purpose.
 
 The first category tries to address the deep learning in a direct way. These
 tools usually have a very specific goal, which means to support a certain type
@@ -245,7 +246,7 @@ Network
 *******
 
 Layers alone only declare the structure and behavior of the intended network.
-To turn a layer into a trainable model, we need to *compile* the layer into an
+To turn a layer into a trainable model, we need to *compile* the layer into a
 *network* instance (``nerv.Network``). What compilation does is to flatten out
 the nested graph layer according to the specified structure and allocate the
 space for those intermediate activations between the layers. Although the
@@ -285,7 +286,7 @@ samples together and cut samples into mini-batches.
 Scheduler (Trainer)
 *******************
 
-*Schedulers* refer to those top-level scripts that implements the main training
+*Schedulers* refer to those top-level scripts that implement the main training
 loop and ticks the training process. A general-purpose scheduler typically
 takes in a Lua script written by an end user that contains description of the
 network, task-specific processing for reading data and some hyper-parameters.
diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua
index 3bf5a11..ace4a78 100644
--- a/nerv/layer/affine.lua
+++ b/nerv/layer/affine.lua
@@ -108,7 +108,7 @@ function AffineLayer:bind_params()
         self["ltp" .. i] = self:find_param(pid_list, lconf, self.gconf,
                                             nerv.LinearTransParam,
                                             {self.dim_in[i], self.dim_out[1]})
-        local no_update = lconf["no_update_ltp"..i]
+        local no_update = lconf["no_update_ltp" .. i]
         if (no_update ~= nil) and no_update or lconf.no_update_all then
             self["ltp" .. i].no_update = true
         end
@@ -125,7 +125,7 @@ function AffineLayer:bind_params()
 end
 
 function AffineLayer:init(batch_size)
-    if self.ltp.trans:ncol() ~= self.bp.trans:ncol() then
+    if self.dim_out[1] ~= self.bp.trans:ncol() then
         nerv.error("mismatching dimensions of linear transform and bias paramter")
     end
     for i = 1, #self.dim_in do
diff --git a/nerv/layer/lstm_gate.lua b/nerv/layer/lstm_gate.lua
index 99bf3ca..82824fa 100644
--- a/nerv/layer/lstm_gate.lua
+++ b/nerv/layer/lstm_gate.lua
@@ -12,17 +12,20 @@ function LSTMGateLayer:bind_params()
     local lconf = self.lconf
     lconf.no_update_ltp1 = lconf.no_update_ltp1 or lconf.no_update_ltp
     for i = 1, #self.dim_in do
-        self["ltp" .. i] = self:find_param("ltp" .. i, lconf, self.gconf,
+        local pid = "ltp" .. i
+        local pid_list = i == 1 and {pid, "ltp"} or pid
+        self["ltp" .. i] = self:find_param(pid_list, lconf, self.gconf,
                                             nerv.LinearTransParam,
                                             {self.dim_in[i], self.dim_out[1]})
         if self.param_type[i] == 'D' then
             self["ltp" .. i].trans:diagonalize()
         end
-        local no_update = lconf["no_update_ltp"..i]
+        local no_update = lconf["no_update_ltp" .. i]
         if (no_update ~= nil) and no_update or lconf.no_update_all then
             self["ltp" .. i].no_update = true
         end
     end
+    self.ltp = self.ltp1 -- alias of ltp1
     self.bp = self:find_param("bp", lconf, self.gconf,
                                 nerv.BiasParam, {1, self.dim_out[1]},
                                 nerv.Param.gen_zero)
@@ -33,18 +36,17 @@ function LSTMGateLayer:bind_params()
 end
 
 function LSTMGateLayer:init(batch_size)
+    if self.dim_out[1] ~= self.bp.trans:ncol() then
+        nerv.error("mismatching dimensions of linear transform and bias paramter")
+    end
     for i = 1, #self.dim_in do
-        if self["ltp" .. i].trans:ncol() ~= self.bp.trans:ncol() then
-            nerv.error("mismatching dimensions of linear transform and bias paramter")
-        end
         if self.dim_in[i] ~= self["ltp" .. i].trans:nrow() then
             nerv.error("mismatching dimensions of linear transform parameter and input")
         end
-        self["ltp"..i]:train_init()
-    end
-    
-    if self.dim_out[1] ~= self.ltp1.trans:ncol() then
-        nerv.error("mismatching dimensions of linear transform parameter and output")
+        if self.dim_out[1] ~= self["ltp" .. i].trans:ncol() then
+            nerv.error("mismatching dimensions of linear transform parameter and output")
+        end
+        self["ltp" .. i]:train_init()
     end
     self.bp:train_init()
     self.err_bakm = self.mat_type(batch_size, self.dim_out[1])
diff --git a/nerv/layer/projection.lua b/nerv/layer/projection.lua
index d99401c..9d376bd 100644
--- a/nerv/layer/projection.lua
+++ b/nerv/layer/projection.lua
@@ -8,12 +8,18 @@ function ProjectionLayer:__init(id, global_conf, layer_conf)
 end
 
 function ProjectionLayer:bind_params()
+    local lconf = self.lconf
+    lconf.no_update_ltp1 = lconf.no_update_ltp1 or lconf.no_update_ltp
     for i = 1, #self.dim_in do
         local pid = "ltp" .. i
         local pid_list = i == 1 and {pid, "ltp"} or pid
-        self["ltp" .. i] = self:find_param(pid_list, self.lconf, self.gconf,
+        self["ltp" .. i] = self:find_param(pid_list, lconf, self.gconf,
                                             nerv.LinearTransParam,
                                             {self.dim_in[i], self.dim_out[1]})
+        local no_update = lconf["no_update_ltp" .. i]
+        if (no_update ~= nil) and no_update or lconf.no_update_all then
+            self["ltp" .. i].no_update = true
+        end
     end
     self.ltp = self.ltp1 -- alias of ltp1
 end
author	Determinant <ted.sybil@gmail.com>	2016-05-05 22:09:17 +0800
committer	Determinant <ted.sybil@gmail.com>	2016-05-05 22:09:17 +0800
commit	1c7e2c3da330e91c504a8d210290305f4a553af7 (patch)
tree	3f5eb78204145dd4558fda34916bd0bd5e50431f
parent	40da326da485cae4e0a72a96db1fb0a3c90757e4 (diff)