aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDeterminant <[email protected]>2016-05-05 22:09:17 +0800
committerDeterminant <[email protected]>2016-05-05 22:09:17 +0800
commit1c7e2c3da330e91c504a8d210290305f4a553af7 (patch)
tree3f5eb78204145dd4558fda34916bd0bd5e50431f
parent40da326da485cae4e0a72a96db1fb0a3c90757e4 (diff)
make `bind_params` and `init` consistent in `affine.lua`,
`lstm_gate.lua`, `projection.lua`
-rw-r--r--nerv/doc/source/overview.rst9
-rw-r--r--nerv/layer/affine.lua4
-rw-r--r--nerv/layer/lstm_gate.lua22
-rw-r--r--nerv/layer/projection.lua8
4 files changed, 26 insertions, 17 deletions
diff --git a/nerv/doc/source/overview.rst b/nerv/doc/source/overview.rst
index 8ff1dd5..5c75bd8 100644
--- a/nerv/doc/source/overview.rst
+++ b/nerv/doc/source/overview.rst
@@ -6,7 +6,7 @@ What is NERV?
NERV is a general-purpose deep learning toolkit designed to be be simple,
lightweight, extensible and complete. The name "NERV" comes from the German
-word "nerv" for "nerve" in English. It is also a fictional organization in the
+word "nerv" for "nerve" in English. It is also a fictional organization in
*Neon Genesis Evangelion*, a famous anime.
@@ -18,7 +18,8 @@ and related deep learning techniques have given rise to many tools and toolkits
which are designed for constructing and training neural networks which could
facilitate and routinize the research cycle of deep learning applied to areas
such as speech processing, computer vision, natural language processing and so
-on. Such tools or toolkits can be categorized into two by design: task-specific or general-purpose.
+on. Such tools or toolkits can be categorized into two by design: task-specific
+or general-purpose.
The first category tries to address the deep learning in a direct way. These
tools usually have a very specific goal, which means to support a certain type
@@ -245,7 +246,7 @@ Network
*******
Layers alone only declare the structure and behavior of the intended network.
-To turn a layer into a trainable model, we need to *compile* the layer into an
+To turn a layer into a trainable model, we need to *compile* the layer into a
*network* instance (``nerv.Network``). What compilation does is to flatten out
the nested graph layer according to the specified structure and allocate the
space for those intermediate activations between the layers. Although the
@@ -285,7 +286,7 @@ samples together and cut samples into mini-batches.
Scheduler (Trainer)
*******************
-*Schedulers* refer to those top-level scripts that implements the main training
+*Schedulers* refer to those top-level scripts that implement the main training
loop and ticks the training process. A general-purpose scheduler typically
takes in a Lua script written by an end user that contains description of the
network, task-specific processing for reading data and some hyper-parameters.
diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua
index 3bf5a11..ace4a78 100644
--- a/nerv/layer/affine.lua
+++ b/nerv/layer/affine.lua
@@ -108,7 +108,7 @@ function AffineLayer:bind_params()
self["ltp" .. i] = self:find_param(pid_list, lconf, self.gconf,
nerv.LinearTransParam,
{self.dim_in[i], self.dim_out[1]})
- local no_update = lconf["no_update_ltp"..i]
+ local no_update = lconf["no_update_ltp" .. i]
if (no_update ~= nil) and no_update or lconf.no_update_all then
self["ltp" .. i].no_update = true
end
@@ -125,7 +125,7 @@ function AffineLayer:bind_params()
end
function AffineLayer:init(batch_size)
- if self.ltp.trans:ncol() ~= self.bp.trans:ncol() then
+ if self.dim_out[1] ~= self.bp.trans:ncol() then
nerv.error("mismatching dimensions of linear transform and bias paramter")
end
for i = 1, #self.dim_in do
diff --git a/nerv/layer/lstm_gate.lua b/nerv/layer/lstm_gate.lua
index 99bf3ca..82824fa 100644
--- a/nerv/layer/lstm_gate.lua
+++ b/nerv/layer/lstm_gate.lua
@@ -12,17 +12,20 @@ function LSTMGateLayer:bind_params()
local lconf = self.lconf
lconf.no_update_ltp1 = lconf.no_update_ltp1 or lconf.no_update_ltp
for i = 1, #self.dim_in do
- self["ltp" .. i] = self:find_param("ltp" .. i, lconf, self.gconf,
+ local pid = "ltp" .. i
+ local pid_list = i == 1 and {pid, "ltp"} or pid
+ self["ltp" .. i] = self:find_param(pid_list, lconf, self.gconf,
nerv.LinearTransParam,
{self.dim_in[i], self.dim_out[1]})
if self.param_type[i] == 'D' then
self["ltp" .. i].trans:diagonalize()
end
- local no_update = lconf["no_update_ltp"..i]
+ local no_update = lconf["no_update_ltp" .. i]
if (no_update ~= nil) and no_update or lconf.no_update_all then
self["ltp" .. i].no_update = true
end
end
+ self.ltp = self.ltp1 -- alias of ltp1
self.bp = self:find_param("bp", lconf, self.gconf,
nerv.BiasParam, {1, self.dim_out[1]},
nerv.Param.gen_zero)
@@ -33,18 +36,17 @@ function LSTMGateLayer:bind_params()
end
function LSTMGateLayer:init(batch_size)
+ if self.dim_out[1] ~= self.bp.trans:ncol() then
+ nerv.error("mismatching dimensions of linear transform and bias paramter")
+ end
for i = 1, #self.dim_in do
- if self["ltp" .. i].trans:ncol() ~= self.bp.trans:ncol() then
- nerv.error("mismatching dimensions of linear transform and bias paramter")
- end
if self.dim_in[i] ~= self["ltp" .. i].trans:nrow() then
nerv.error("mismatching dimensions of linear transform parameter and input")
end
- self["ltp"..i]:train_init()
- end
-
- if self.dim_out[1] ~= self.ltp1.trans:ncol() then
- nerv.error("mismatching dimensions of linear transform parameter and output")
+ if self.dim_out[1] ~= self["ltp" .. i].trans:ncol() then
+ nerv.error("mismatching dimensions of linear transform parameter and output")
+ end
+ self["ltp" .. i]:train_init()
end
self.bp:train_init()
self.err_bakm = self.mat_type(batch_size, self.dim_out[1])
diff --git a/nerv/layer/projection.lua b/nerv/layer/projection.lua
index d99401c..9d376bd 100644
--- a/nerv/layer/projection.lua
+++ b/nerv/layer/projection.lua
@@ -8,12 +8,18 @@ function ProjectionLayer:__init(id, global_conf, layer_conf)
end
function ProjectionLayer:bind_params()
+ local lconf = self.lconf
+ lconf.no_update_ltp1 = lconf.no_update_ltp1 or lconf.no_update_ltp
for i = 1, #self.dim_in do
local pid = "ltp" .. i
local pid_list = i == 1 and {pid, "ltp"} or pid
- self["ltp" .. i] = self:find_param(pid_list, self.lconf, self.gconf,
+ self["ltp" .. i] = self:find_param(pid_list, lconf, self.gconf,
nerv.LinearTransParam,
{self.dim_in[i], self.dim_out[1]})
+ local no_update = lconf["no_update_ltp" .. i]
+ if (no_update ~= nil) and no_update or lconf.no_update_all then
+ self["ltp" .. i].no_update = true
+ end
end
self.ltp = self.ltp1 -- alias of ltp1
end