aboutsummaryrefslogtreecommitdiff
path: root/nerv/layer
diff options
context:
space:
mode:
Diffstat (limited to 'nerv/layer')
-rw-r--r--nerv/layer/affine.lua63
-rw-r--r--nerv/layer/bias.lua2
-rw-r--r--nerv/layer/combiner.lua13
-rw-r--r--nerv/layer/init.lua35
-rw-r--r--nerv/layer/mse.lua17
-rw-r--r--nerv/layer/softmax_ce.lua11
-rw-r--r--nerv/layer/window.lua2
7 files changed, 94 insertions, 49 deletions
diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua
index 566e9bc..4156dde 100644
--- a/nerv/layer/affine.lua
+++ b/nerv/layer/affine.lua
@@ -1,15 +1,28 @@
+--- Parameter and layer classes related to linear transform.
+
local MatrixParam = nerv.class('nerv.MatrixParam', 'nerv.Param')
local LinearTransParam = nerv.class('nerv.LinearTransParam', 'nerv.MatrixParam')
local BiasParam = nerv.class('nerv.BiasParam', 'nerv.MatrixParam')
local AffineLayer = nerv.class('nerv.AffineLayer', 'nerv.Layer')
+--- A parameter that consists of a single matrix
+-- @type nerv.MatrixParam
+
+--- Read from a file handle.
+-- @param handle the file handle
function MatrixParam:read(handle)
- self.trans = self.gconf.cumat_type.new_from_host(
- self.gconf.mmat_type.load(handle))
+ self.trans = self.gconf.mmat_type.load(handle)
+ if not self.gconf.use_cpu then
+ self.trans = self.gconf.cumat_type.new_from_host(self.trans)
+ end
end
function MatrixParam:write(handle)
- self.trans:new_to_host():save(handle)
+ local trans = self.trans
+ if not self.gconf.use_cpu then
+ trans = self.trans:new_to_host()
+ end
+ trans:save(handle)
end
function MatrixParam:train_init()
@@ -59,15 +72,28 @@ function LinearTransParam:update_by_err_input(err, input)
self:_update_by_err_input(err, input, l2, l2)
end
+--- A fully-connected linear transform layer.
+-- @type nerv.AffineLayer
+
+--- The constructor.
function AffineLayer:__init(id, global_conf, layer_conf)
self.id = id
self.dim_in = layer_conf.dim_in
self.dim_out = layer_conf.dim_out
- self.ltp = self:find_param("ltp", layer_conf, global_conf, nerv.LinearTransParam, {self.dim_in[1], self.dim_out[1]}) --layer_conf.ltp
- for i = 2, #self.dim_in do
- self["ltp" .. i] = self:find_param("ltp" .. i, layer_conf, global_conf, nerv.LinearTransParam, {self.dim_in[i], self.dim_out[1]})
+ if layer_conf.ltp ~= nil and layer_conf.ltp1 == nil then
+ layer_conf.ltp1 = layer_conf.ltp
+ end
+ for i = 1, #self.dim_in do
+ local pid = "ltp" .. i
+ local pid_list = i == 1 and {pid, "ltp"} or pid
+ self["ltp" .. i] = self:find_param(pid_list, layer_conf, global_conf,
+ nerv.LinearTransParam,
+ {self.dim_in[i], self.dim_out[1]})
end
- self.bp = self:find_param("bp", layer_conf, global_conf, nerv.BiasParam, {1, self.dim_out[1]}) --layer_conf.bp
+ self.ltp = self.ltp1 -- alias of ltp1
+ self.bp = self:find_param("bp", layer_conf, global_conf,
+ nerv.BiasParam,
+ {1, self.dim_out[1]})
self.gconf = global_conf
self:check_dim_len(-1, 1) -- exactly one output, allow multiple inputs
end
@@ -76,15 +102,7 @@ function AffineLayer:init(batch_size)
if self.ltp.trans:ncol() ~= self.bp.trans:ncol() then
nerv.error("mismatching dimensions of linear transform and bias paramter")
end
- self.bp:train_init()
- if self.dim_in[1] ~= self.ltp.trans:nrow() then
- nerv.error("mismatching dimensions of linear transform parameter and input")
- end
- if self.dim_out[1] ~= self.ltp.trans:ncol() then
- nerv.error("mismatching dimensions of linear transform parameter and output")
- end
- self.ltp:train_init()
- for i = 2, #self.dim_in do
+ for i = 1, #self.dim_in do
if self.dim_in[i] ~= self["ltp" .. i].trans:nrow() then
nerv.error("mismatching dimensions of linear transform parameter and input")
end
@@ -93,6 +111,7 @@ function AffineLayer:init(batch_size)
end
self["ltp" .. i]:train_init()
end
+ self.bp:train_init()
end
function AffineLayer:batch_resize(batch_size)
@@ -100,30 +119,30 @@ function AffineLayer:batch_resize(batch_size)
end
function AffineLayer:update(bp_err, input, output)
- self.ltp:update_by_err_input(bp_err[1], input[1])
- for i = 2, #self.dim_in do
+ for i = 1, #self.dim_in do
self["ltp" .. i]:update_by_err_input(bp_err[1], input[i])
end
self.bp:update_by_gradient(bp_err[1]:colsum())
end
function AffineLayer:propagate(input, output)
- output[1]:mul(input[1], self.ltp.trans, 1.0, 0.0, 'N', 'N')
+ -- apply linear transform
+ output[1]:mul(input[1], self.ltp1.trans, 1.0, 0.0, 'N', 'N')
for i = 2, #self.dim_in do
output[1]:mul(input[i], self["ltp" .. i].trans, 1.0, 1.0, 'N', 'N')
end
+ -- add bias
output[1]:add_row(self.bp.trans, 1.0)
end
function AffineLayer:back_propagate(bp_err, next_bp_err, input, output)
- next_bp_err[1]:mul(bp_err[1], self.ltp.trans, 1.0, 0.0, 'N', 'T')
- for i = 2, #self.dim_in do
+ for i = 1, #self.dim_in do
next_bp_err[i]:mul(bp_err[1], self["ltp" .. i].trans, 1.0, 0.0, 'N', 'T')
end
end
function AffineLayer:get_params()
- local pr = nerv.ParamRepo({self.ltp, self.bp})
+ local pr = nerv.ParamRepo({self.ltp1, self.bp})
for i = 2, #self.dim_in do
pr:add(self["ltp" .. i].id, self["ltp" .. i])
end
diff --git a/nerv/layer/bias.lua b/nerv/layer/bias.lua
index 7e9fd46..924c3da 100644
--- a/nerv/layer/bias.lua
+++ b/nerv/layer/bias.lua
@@ -23,7 +23,7 @@ function BiasLayer:batch_resize(batch_size)
end
function BiasLayer:propagate(input, output)
- output[1]:copy_fromd(input[1])
+ output[1]:copy_from(input[1])
output[1]:add_row(self.bias.trans, 1.0)
end
diff --git a/nerv/layer/combiner.lua b/nerv/layer/combiner.lua
index 1bcfdfb..22e89a9 100644
--- a/nerv/layer/combiner.lua
+++ b/nerv/layer/combiner.lua
@@ -6,6 +6,11 @@ function CombinerLayer:__init(id, global_conf, layer_conf)
self.dim_in = layer_conf.dim_in
self.dim_out = layer_conf.dim_out
self.gconf = global_conf
+ if self.gconf.use_cpu then
+ self.mat_type = self.gconf.mmat_type
+ else
+ self.mat_type = self.gconf.cumat_type
+ end
self:check_dim_len(#self.lambda, -1)
if #self.dim_in < 1 then
nerv.error("no input specified")
@@ -27,12 +32,12 @@ function CombinerLayer:init(batch_size)
nerv.error("mismatching dimensions of inputs/outputs")
end
end
- self.sum = self.gconf.cumat_type(batch_size, dim)
+ self.sum = self.mat_type(batch_size, dim)
end
function CombinerLayer:batch_resize(batch_size)
if self.sum:nrow() ~= batch_size then
- self.sum = self.gconf.cumat_type(batch_size, self.dim_in[1])
+ self.sum = self.mat_type(batch_size, self.dim_in[1])
end
end
@@ -45,13 +50,13 @@ function CombinerLayer:propagate(input, output)
output[1]:add(output[1], input[i], 1.0, self.lambda[i])
end
for i = 2, #self.dim_out do
- output[i]:copy_fromd(output[1])
+ output[i]:copy_from(output[1])
end
end
function CombinerLayer:back_propagate(bp_err, next_bp_err, input, output)
local sum = self.sum
- sum:copy_fromd(bp_err[1])
+ sum:copy_from(bp_err[1])
for i = 2, #self.dim_out do
sum:add(sum, bp_err[i], 1.0, 1.0)
end
diff --git a/nerv/layer/init.lua b/nerv/layer/init.lua
index 23606e1..43c2250 100644
--- a/nerv/layer/init.lua
+++ b/nerv/layer/init.lua
@@ -70,22 +70,33 @@ function Layer:get_dim()
return self.dim_in, self.dim_out
end
-function Layer:find_param(pid, l_conf, gconf, p_type, p_dim)
- if l_conf[pid] ~= nil then
- nerv.info("Param [%s] of layer [%s] found in layer_conf.", pid, self.id)
- return l_conf[pid]
+function Layer:find_param(pid_list, lconf, gconf, p_type, p_dim)
+ if type(pid_list) == "string" then
+ pid_list = {pid_list}
end
- local pid_g = self.id .. '_' .. pid --global identifier
- local pr = l_conf.pr
- local p
- if pr ~= nil and pr:has_param(pid_g) == true then
- nerv.info("Param [%s] of layer [%s] found in layer_conf.paramRepo.", pid, self.id)
- p = pr:get_param(pid_g)
- return p
+ pid_list_str = table.tostring(pid_list)
+ for i, pid in ipairs(pid_list) do
+ if lconf[pid] ~= nil then
+ nerv.info("param [%s] of layer [%s] found in `layer_conf`.", pid, self.id)
+ return lconf[pid]
+ end
+ local pid_g = self.id .. '_' .. pid --global identifier
+ local pr = lconf.pr
+ local p
+ if pr ~= nil and pr:has_param(pid_g) == true then
+ nerv.info("param [%s] of layer [%s] found in `layer_conf.pr`.", pid_list_str, self.id)
+ p = pr:get_param(pid_g)
+ return p
+ end
end
- nerv.info("Param [%s] of layer [%s] is not found in layer_conf or layer_conf.paramRepo, switch to auto-generate.", pid, self.id)
+ nerv.info("param [%s] of layer [%s] is not found in `layer_conf` or `layer_conf.pr`, " ..
+ "switch to auto-generate.", pid_list_str, self.id)
+ local pid_g = self.id .. '_' .. pid_list[1]
p = p_type(pid_g, gconf)
p.trans = gconf.cumat_type(unpack(p_dim))
+ if type(gconf.param_random) ~= "function" then
+ nerv.error("a param generate function is needed")
+ end
p.trans:generate(gconf.param_random)
return p
end
diff --git a/nerv/layer/mse.lua b/nerv/layer/mse.lua
index 0ee3080..1c218d0 100644
--- a/nerv/layer/mse.lua
+++ b/nerv/layer/mse.lua
@@ -5,6 +5,11 @@ function MSELayer:__init(id, global_conf, layer_conf)
self.dim_in = layer_conf.dim_in
self.dim_out = layer_conf.dim_out
self.gconf = global_conf
+ if self.gconf.use_cpu then
+ self.mat_type = self.gconf.mmat_type
+ else
+ self.mat_type = self.gconf.cumat_type
+ end
self:check_dim_len(2, -1)
end
@@ -15,15 +20,15 @@ function MSELayer:init(batch_size)
self.scale = 1 / self.dim_in[1]
self.total_mse = 0.0
self.total_frames = 0
- self.mse = self.gconf.cumat_type(batch_size, self.dim_in[1])
- self.mse_sum = self.gconf.cumat_type(batch_size, 1)
+ self.mse = self.mat_type(batch_size, self.dim_in[1])
+ self.mse_sum = self.mat_type(batch_size, 1)
self.diff = self.mse:create()
end
function MSELayer:batch_resize(batch_size)
if self.mse:nrow() ~= batch_resize then
- self.mse = self.gconf.cumat_type(batch_size, self.dim_in[1])
- self.mse_sum = self.gconf.cumat_type(batch_size, 1)
+ self.mse = self.mat_type(batch_size, self.dim_in[1])
+ self.mse_sum = self.mat_type(batch_size, 1)
self.diff = self.mse:create()
end
end
@@ -36,11 +41,11 @@ function MSELayer:propagate(input, output)
local mse = self.mse
local mse_sum = self.mse_sum
mse:add(input[1], input[2], 1.0, -1.0)
- self.diff:copy_fromd(mse)
+ self.diff:copy_from(mse)
mse:mul_elem(mse, mse)
mse_sum:add(mse_sum, mse:rowsum(mse), 0.0, self.scale)
if output[1] ~= nil then
- output[1]:copy_fromd(mse_sum)
+ output[1]:copy_from(mse_sum)
end
self.total_mse = self.total_mse + mse_sum:colsum()[0][0]
self.total_frames = self.total_frames + mse_sum:nrow()
diff --git a/nerv/layer/softmax_ce.lua b/nerv/layer/softmax_ce.lua
index 9071e86..31a2ad7 100644
--- a/nerv/layer/softmax_ce.lua
+++ b/nerv/layer/softmax_ce.lua
@@ -3,6 +3,11 @@ local SoftmaxCELayer = nerv.class("nerv.SoftmaxCELayer", "nerv.Layer")
function SoftmaxCELayer:__init(id, global_conf, layer_conf)
self.id = id
self.gconf = global_conf
+ if self.gconf.use_cpu then
+ self.mat_type = self.gconf.mmat_type
+ else
+ self.mat_type = self.gconf.cumat_type
+ end
self.dim_in = layer_conf.dim_in
self.dim_out = layer_conf.dim_out
self.compressed = layer_conf.compressed
@@ -19,13 +24,13 @@ function SoftmaxCELayer:init(batch_size)
self.total_ce = 0.0
self.total_correct = 0
self.total_frames = 0
- self.softmax = self.gconf.cumat_type(batch_size, self.dim_in[1])
+ self.softmax = self.mat_type(batch_size, self.dim_in[1])
self.ce = self.softmax:create()
end
function SoftmaxCELayer:batch_resize(batch_size)
if self.softmax:nrow() ~= batch_resize then
- self.softmax = self.gconf.cumat_type(batch_size, self.dim_in[1])
+ self.softmax = self.mat_type(batch_size, self.dim_in[1])
self.ce = self.softmax:create()
end
end
@@ -46,7 +51,7 @@ function SoftmaxCELayer:propagate(input, output)
ce:mul_elem(ce, label)
ce = ce:rowsum()
if output[1] ~= nil then
- output[1]:copy_fromd(ce)
+ output[1]:copy_from(ce)
end
-- add total ce
self.total_ce = self.total_ce - ce:colsum()[0][0]
diff --git a/nerv/layer/window.lua b/nerv/layer/window.lua
index 8eed352..4933de0 100644
--- a/nerv/layer/window.lua
+++ b/nerv/layer/window.lua
@@ -23,7 +23,7 @@ function WindowLayer:batch_resize(batch_size)
end
function WindowLayer:propagate(input, output)
- output[1]:copy_fromd(input[1])
+ output[1]:copy_from(input[1])
output[1]:scale_rows_by_row(self.window.trans)
end