aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--nerv/doc/source/overview.rst9
-rw-r--r--nerv/layer/affine.lua4
-rw-r--r--nerv/layer/lstm_gate.lua22
-rw-r--r--nerv/layer/projection.lua8
4 files changed, 26 insertions, 17 deletions
diff --git a/nerv/doc/source/overview.rst b/nerv/doc/source/overview.rst
index 8ff1dd5..5c75bd8 100644
--- a/nerv/doc/source/overview.rst
+++ b/nerv/doc/source/overview.rst
@@ -6,7 +6,7 @@ What is NERV?
NERV is a general-purpose deep learning toolkit designed to be be simple,
lightweight, extensible and complete. The name "NERV" comes from the German
-word "nerv" for "nerve" in English. It is also a fictional organization in the
+word "nerv" for "nerve" in English. It is also a fictional organization in
*Neon Genesis Evangelion*, a famous anime.
@@ -18,7 +18,8 @@ and related deep learning techniques have given rise to many tools and toolkits
which are designed for constructing and training neural networks which could
facilitate and routinize the research cycle of deep learning applied to areas
such as speech processing, computer vision, natural language processing and so
-on. Such tools or toolkits can be categorized into two by design: task-specific or general-purpose.
+on. Such tools or toolkits can be categorized into two by design: task-specific
+or general-purpose.
The first category tries to address the deep learning in a direct way. These
tools usually have a very specific goal, which means to support a certain type
@@ -245,7 +246,7 @@ Network
*******
Layers alone only declare the structure and behavior of the intended network.
-To turn a layer into a trainable model, we need to *compile* the layer into an
+To turn a layer into a trainable model, we need to *compile* the layer into a
*network* instance (``nerv.Network``). What compilation does is to flatten out
the nested graph layer according to the specified structure and allocate the
space for those intermediate activations between the layers. Although the
@@ -285,7 +286,7 @@ samples together and cut samples into mini-batches.
Scheduler (Trainer)
*******************
-*Schedulers* refer to those top-level scripts that implements the main training
+*Schedulers* refer to those top-level scripts that implement the main training
loop and ticks the training process. A general-purpose scheduler typically
takes in a Lua script written by an end user that contains description of the
network, task-specific processing for reading data and some hyper-parameters.
diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua
index 467eac9..16250fd 100644
--- a/nerv/layer/affine.lua
+++ b/nerv/layer/affine.lua
@@ -108,7 +108,7 @@ function AffineLayer:bind_params()
self["ltp" .. i] = self:find_param(pid_list, lconf, self.gconf,
nerv.LinearTransParam,
{self.dim_in[i], self.dim_out[1]})
- local no_update = lconf["no_update_ltp"..i]
+ local no_update = lconf["no_update_ltp" .. i]
if (no_update ~= nil) and no_update or lconf.no_update_all then
self["ltp" .. i].no_update = true
end
@@ -125,7 +125,7 @@ function AffineLayer:bind_params()
end
function AffineLayer:init(batch_size)
- if self.ltp.trans:ncol() ~= self.bp.trans:ncol() then
+ if self.dim_out[1] ~= self.bp.trans:ncol() then
nerv.error("mismatching dimensions of linear transform and bias paramter")
end
for i = 1, #self.dim_in do
diff --git a/nerv/layer/lstm_gate.lua b/nerv/layer/lstm_gate.lua
index 3bb3cb9..39a3ff7 100644
--- a/nerv/layer/lstm_gate.lua
+++ b/nerv/layer/lstm_gate.lua
@@ -12,17 +12,20 @@ function LSTMGateLayer:bind_params()
local lconf = self.lconf
lconf.no_update_ltp1 = lconf.no_update_ltp1 or lconf.no_update_ltp
for i = 1, #self.dim_in do
- self["ltp" .. i] = self:find_param("ltp" .. i, lconf, self.gconf,
+ local pid = "ltp" .. i
+ local pid_list = i == 1 and {pid, "ltp"} or pid
+ self["ltp" .. i] = self:find_param(pid_list, lconf, self.gconf,
nerv.LinearTransParam,
{self.dim_in[i], self.dim_out[1]})
if self.param_type[i] == 'D' then
self["ltp" .. i].trans:diagonalize()
end
- local no_update = lconf["no_update_ltp"..i]
+ local no_update = lconf["no_update_ltp" .. i]
if (no_update ~= nil) and no_update or lconf.no_update_all then
self["ltp" .. i].no_update = true
end
end
+ self.ltp = self.ltp1 -- alias of ltp1
self.bp = self:find_param("bp", lconf, self.gconf,
nerv.BiasParam, {1, self.dim_out[1]},
nerv.Param.gen_zero)
@@ -33,18 +36,17 @@ function LSTMGateLayer:bind_params()
end
function LSTMGateLayer:init(batch_size)
+ if self.dim_out[1] ~= self.bp.trans:ncol() then
+ nerv.error("mismatching dimensions of linear transform and bias paramter")
+ end
for i = 1, #self.dim_in do
- if self["ltp" .. i].trans:ncol() ~= self.bp.trans:ncol() then
- nerv.error("mismatching dimensions of linear transform and bias paramter")
- end
if self.dim_in[i] ~= self["ltp" .. i].trans:nrow() then
nerv.error("mismatching dimensions of linear transform parameter and input")
end
- self["ltp"..i]:train_init()
- end
-
- if self.dim_out[1] ~= self.ltp1.trans:ncol() then
- nerv.error("mismatching dimensions of linear transform parameter and output")
+ if self.dim_out[1] ~= self["ltp" .. i].trans:ncol() then
+ nerv.error("mismatching dimensions of linear transform parameter and output")
+ end
+ self["ltp" .. i]:train_init()
end
self.bp:train_init()
self.err_bakm = self.mat_type(batch_size, self.dim_out[1])
diff --git a/nerv/layer/projection.lua b/nerv/layer/projection.lua
index c0b5638..077125b 100644
--- a/nerv/layer/projection.lua
+++ b/nerv/layer/projection.lua
@@ -8,12 +8,18 @@ function ProjectionLayer:__init(id, global_conf, layer_conf)
end
function ProjectionLayer:bind_params()
+ local lconf = self.lconf
+ lconf.no_update_ltp1 = lconf.no_update_ltp1 or lconf.no_update_ltp
for i = 1, #self.dim_in do
local pid = "ltp" .. i
local pid_list = i == 1 and {pid, "ltp"} or pid
- self["ltp" .. i] = self:find_param(pid_list, self.lconf, self.gconf,
+ self["ltp" .. i] = self:find_param(pid_list, lconf, self.gconf,
nerv.LinearTransParam,
{self.dim_in[i], self.dim_out[1]})
+ local no_update = lconf["no_update_ltp" .. i]
+ if (no_update ~= nil) and no_update or lconf.no_update_all then
+ self["ltp" .. i].no_update = true
+ end
end
self.ltp = self.ltp1 -- alias of ltp1
end