From 1c7e2c3da330e91c504a8d210290305f4a553af7 Mon Sep 17 00:00:00 2001 From: Determinant Date: Thu, 5 May 2016 22:09:17 +0800 Subject: make `bind_params` and `init` consistent in `affine.lua`, `lstm_gate.lua`, `projection.lua` --- nerv/doc/source/overview.rst | 9 +++++---- nerv/layer/affine.lua | 4 ++-- nerv/layer/lstm_gate.lua | 22 ++++++++++++---------- nerv/layer/projection.lua | 8 +++++++- 4 files changed, 26 insertions(+), 17 deletions(-) diff --git a/nerv/doc/source/overview.rst b/nerv/doc/source/overview.rst index 8ff1dd5..5c75bd8 100644 --- a/nerv/doc/source/overview.rst +++ b/nerv/doc/source/overview.rst @@ -6,7 +6,7 @@ What is NERV? NERV is a general-purpose deep learning toolkit designed to be be simple, lightweight, extensible and complete. The name "NERV" comes from the German -word "nerv" for "nerve" in English. It is also a fictional organization in the +word "nerv" for "nerve" in English. It is also a fictional organization in *Neon Genesis Evangelion*, a famous anime. @@ -18,7 +18,8 @@ and related deep learning techniques have given rise to many tools and toolkits which are designed for constructing and training neural networks which could facilitate and routinize the research cycle of deep learning applied to areas such as speech processing, computer vision, natural language processing and so -on. Such tools or toolkits can be categorized into two by design: task-specific or general-purpose. +on. Such tools or toolkits can be categorized into two by design: task-specific +or general-purpose. The first category tries to address the deep learning in a direct way. These tools usually have a very specific goal, which means to support a certain type @@ -245,7 +246,7 @@ Network ******* Layers alone only declare the structure and behavior of the intended network. -To turn a layer into a trainable model, we need to *compile* the layer into an +To turn a layer into a trainable model, we need to *compile* the layer into a *network* instance (``nerv.Network``). What compilation does is to flatten out the nested graph layer according to the specified structure and allocate the space for those intermediate activations between the layers. Although the @@ -285,7 +286,7 @@ samples together and cut samples into mini-batches. Scheduler (Trainer) ******************* -*Schedulers* refer to those top-level scripts that implements the main training +*Schedulers* refer to those top-level scripts that implement the main training loop and ticks the training process. A general-purpose scheduler typically takes in a Lua script written by an end user that contains description of the network, task-specific processing for reading data and some hyper-parameters. diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua index 3bf5a11..ace4a78 100644 --- a/nerv/layer/affine.lua +++ b/nerv/layer/affine.lua @@ -108,7 +108,7 @@ function AffineLayer:bind_params() self["ltp" .. i] = self:find_param(pid_list, lconf, self.gconf, nerv.LinearTransParam, {self.dim_in[i], self.dim_out[1]}) - local no_update = lconf["no_update_ltp"..i] + local no_update = lconf["no_update_ltp" .. i] if (no_update ~= nil) and no_update or lconf.no_update_all then self["ltp" .. i].no_update = true end @@ -125,7 +125,7 @@ function AffineLayer:bind_params() end function AffineLayer:init(batch_size) - if self.ltp.trans:ncol() ~= self.bp.trans:ncol() then + if self.dim_out[1] ~= self.bp.trans:ncol() then nerv.error("mismatching dimensions of linear transform and bias paramter") end for i = 1, #self.dim_in do diff --git a/nerv/layer/lstm_gate.lua b/nerv/layer/lstm_gate.lua index 99bf3ca..82824fa 100644 --- a/nerv/layer/lstm_gate.lua +++ b/nerv/layer/lstm_gate.lua @@ -12,17 +12,20 @@ function LSTMGateLayer:bind_params() local lconf = self.lconf lconf.no_update_ltp1 = lconf.no_update_ltp1 or lconf.no_update_ltp for i = 1, #self.dim_in do - self["ltp" .. i] = self:find_param("ltp" .. i, lconf, self.gconf, + local pid = "ltp" .. i + local pid_list = i == 1 and {pid, "ltp"} or pid + self["ltp" .. i] = self:find_param(pid_list, lconf, self.gconf, nerv.LinearTransParam, {self.dim_in[i], self.dim_out[1]}) if self.param_type[i] == 'D' then self["ltp" .. i].trans:diagonalize() end - local no_update = lconf["no_update_ltp"..i] + local no_update = lconf["no_update_ltp" .. i] if (no_update ~= nil) and no_update or lconf.no_update_all then self["ltp" .. i].no_update = true end end + self.ltp = self.ltp1 -- alias of ltp1 self.bp = self:find_param("bp", lconf, self.gconf, nerv.BiasParam, {1, self.dim_out[1]}, nerv.Param.gen_zero) @@ -33,18 +36,17 @@ function LSTMGateLayer:bind_params() end function LSTMGateLayer:init(batch_size) + if self.dim_out[1] ~= self.bp.trans:ncol() then + nerv.error("mismatching dimensions of linear transform and bias paramter") + end for i = 1, #self.dim_in do - if self["ltp" .. i].trans:ncol() ~= self.bp.trans:ncol() then - nerv.error("mismatching dimensions of linear transform and bias paramter") - end if self.dim_in[i] ~= self["ltp" .. i].trans:nrow() then nerv.error("mismatching dimensions of linear transform parameter and input") end - self["ltp"..i]:train_init() - end - - if self.dim_out[1] ~= self.ltp1.trans:ncol() then - nerv.error("mismatching dimensions of linear transform parameter and output") + if self.dim_out[1] ~= self["ltp" .. i].trans:ncol() then + nerv.error("mismatching dimensions of linear transform parameter and output") + end + self["ltp" .. i]:train_init() end self.bp:train_init() self.err_bakm = self.mat_type(batch_size, self.dim_out[1]) diff --git a/nerv/layer/projection.lua b/nerv/layer/projection.lua index d99401c..9d376bd 100644 --- a/nerv/layer/projection.lua +++ b/nerv/layer/projection.lua @@ -8,12 +8,18 @@ function ProjectionLayer:__init(id, global_conf, layer_conf) end function ProjectionLayer:bind_params() + local lconf = self.lconf + lconf.no_update_ltp1 = lconf.no_update_ltp1 or lconf.no_update_ltp for i = 1, #self.dim_in do local pid = "ltp" .. i local pid_list = i == 1 and {pid, "ltp"} or pid - self["ltp" .. i] = self:find_param(pid_list, self.lconf, self.gconf, + self["ltp" .. i] = self:find_param(pid_list, lconf, self.gconf, nerv.LinearTransParam, {self.dim_in[i], self.dim_out[1]}) + local no_update = lconf["no_update_ltp" .. i] + if (no_update ~= nil) and no_update or lconf.no_update_all then + self["ltp" .. i].no_update = true + end end self.ltp = self.ltp1 -- alias of ltp1 end -- cgit v1.2.3