diff options
Diffstat (limited to 'nerv/layer')
-rw-r--r-- | nerv/layer/affine.lua | 63 | ||||
-rw-r--r-- | nerv/layer/bias.lua | 2 | ||||
-rw-r--r-- | nerv/layer/combiner.lua | 13 | ||||
-rw-r--r-- | nerv/layer/init.lua | 35 | ||||
-rw-r--r-- | nerv/layer/mse.lua | 17 | ||||
-rw-r--r-- | nerv/layer/softmax_ce.lua | 11 | ||||
-rw-r--r-- | nerv/layer/window.lua | 2 |
7 files changed, 94 insertions, 49 deletions
diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua index 566e9bc..4156dde 100644 --- a/nerv/layer/affine.lua +++ b/nerv/layer/affine.lua @@ -1,15 +1,28 @@ +--- Parameter and layer classes related to linear transform. + local MatrixParam = nerv.class('nerv.MatrixParam', 'nerv.Param') local LinearTransParam = nerv.class('nerv.LinearTransParam', 'nerv.MatrixParam') local BiasParam = nerv.class('nerv.BiasParam', 'nerv.MatrixParam') local AffineLayer = nerv.class('nerv.AffineLayer', 'nerv.Layer') +--- A parameter that consists of a single matrix +-- @type nerv.MatrixParam + +--- Read from a file handle. +-- @param handle the file handle function MatrixParam:read(handle) - self.trans = self.gconf.cumat_type.new_from_host( - self.gconf.mmat_type.load(handle)) + self.trans = self.gconf.mmat_type.load(handle) + if not self.gconf.use_cpu then + self.trans = self.gconf.cumat_type.new_from_host(self.trans) + end end function MatrixParam:write(handle) - self.trans:new_to_host():save(handle) + local trans = self.trans + if not self.gconf.use_cpu then + trans = self.trans:new_to_host() + end + trans:save(handle) end function MatrixParam:train_init() @@ -59,15 +72,28 @@ function LinearTransParam:update_by_err_input(err, input) self:_update_by_err_input(err, input, l2, l2) end +--- A fully-connected linear transform layer. +-- @type nerv.AffineLayer + +--- The constructor. function AffineLayer:__init(id, global_conf, layer_conf) self.id = id self.dim_in = layer_conf.dim_in self.dim_out = layer_conf.dim_out - self.ltp = self:find_param("ltp", layer_conf, global_conf, nerv.LinearTransParam, {self.dim_in[1], self.dim_out[1]}) --layer_conf.ltp - for i = 2, #self.dim_in do - self["ltp" .. i] = self:find_param("ltp" .. i, layer_conf, global_conf, nerv.LinearTransParam, {self.dim_in[i], self.dim_out[1]}) + if layer_conf.ltp ~= nil and layer_conf.ltp1 == nil then + layer_conf.ltp1 = layer_conf.ltp + end + for i = 1, #self.dim_in do + local pid = "ltp" .. i + local pid_list = i == 1 and {pid, "ltp"} or pid + self["ltp" .. i] = self:find_param(pid_list, layer_conf, global_conf, + nerv.LinearTransParam, + {self.dim_in[i], self.dim_out[1]}) end - self.bp = self:find_param("bp", layer_conf, global_conf, nerv.BiasParam, {1, self.dim_out[1]}) --layer_conf.bp + self.ltp = self.ltp1 -- alias of ltp1 + self.bp = self:find_param("bp", layer_conf, global_conf, + nerv.BiasParam, + {1, self.dim_out[1]}) self.gconf = global_conf self:check_dim_len(-1, 1) -- exactly one output, allow multiple inputs end @@ -76,15 +102,7 @@ function AffineLayer:init(batch_size) if self.ltp.trans:ncol() ~= self.bp.trans:ncol() then nerv.error("mismatching dimensions of linear transform and bias paramter") end - self.bp:train_init() - if self.dim_in[1] ~= self.ltp.trans:nrow() then - nerv.error("mismatching dimensions of linear transform parameter and input") - end - if self.dim_out[1] ~= self.ltp.trans:ncol() then - nerv.error("mismatching dimensions of linear transform parameter and output") - end - self.ltp:train_init() - for i = 2, #self.dim_in do + for i = 1, #self.dim_in do if self.dim_in[i] ~= self["ltp" .. i].trans:nrow() then nerv.error("mismatching dimensions of linear transform parameter and input") end @@ -93,6 +111,7 @@ function AffineLayer:init(batch_size) end self["ltp" .. i]:train_init() end + self.bp:train_init() end function AffineLayer:batch_resize(batch_size) @@ -100,30 +119,30 @@ function AffineLayer:batch_resize(batch_size) end function AffineLayer:update(bp_err, input, output) - self.ltp:update_by_err_input(bp_err[1], input[1]) - for i = 2, #self.dim_in do + for i = 1, #self.dim_in do self["ltp" .. i]:update_by_err_input(bp_err[1], input[i]) end self.bp:update_by_gradient(bp_err[1]:colsum()) end function AffineLayer:propagate(input, output) - output[1]:mul(input[1], self.ltp.trans, 1.0, 0.0, 'N', 'N') + -- apply linear transform + output[1]:mul(input[1], self.ltp1.trans, 1.0, 0.0, 'N', 'N') for i = 2, #self.dim_in do output[1]:mul(input[i], self["ltp" .. i].trans, 1.0, 1.0, 'N', 'N') end + -- add bias output[1]:add_row(self.bp.trans, 1.0) end function AffineLayer:back_propagate(bp_err, next_bp_err, input, output) - next_bp_err[1]:mul(bp_err[1], self.ltp.trans, 1.0, 0.0, 'N', 'T') - for i = 2, #self.dim_in do + for i = 1, #self.dim_in do next_bp_err[i]:mul(bp_err[1], self["ltp" .. i].trans, 1.0, 0.0, 'N', 'T') end end function AffineLayer:get_params() - local pr = nerv.ParamRepo({self.ltp, self.bp}) + local pr = nerv.ParamRepo({self.ltp1, self.bp}) for i = 2, #self.dim_in do pr:add(self["ltp" .. i].id, self["ltp" .. i]) end diff --git a/nerv/layer/bias.lua b/nerv/layer/bias.lua index 7e9fd46..924c3da 100644 --- a/nerv/layer/bias.lua +++ b/nerv/layer/bias.lua @@ -23,7 +23,7 @@ function BiasLayer:batch_resize(batch_size) end function BiasLayer:propagate(input, output) - output[1]:copy_fromd(input[1]) + output[1]:copy_from(input[1]) output[1]:add_row(self.bias.trans, 1.0) end diff --git a/nerv/layer/combiner.lua b/nerv/layer/combiner.lua index 1bcfdfb..22e89a9 100644 --- a/nerv/layer/combiner.lua +++ b/nerv/layer/combiner.lua @@ -6,6 +6,11 @@ function CombinerLayer:__init(id, global_conf, layer_conf) self.dim_in = layer_conf.dim_in self.dim_out = layer_conf.dim_out self.gconf = global_conf + if self.gconf.use_cpu then + self.mat_type = self.gconf.mmat_type + else + self.mat_type = self.gconf.cumat_type + end self:check_dim_len(#self.lambda, -1) if #self.dim_in < 1 then nerv.error("no input specified") @@ -27,12 +32,12 @@ function CombinerLayer:init(batch_size) nerv.error("mismatching dimensions of inputs/outputs") end end - self.sum = self.gconf.cumat_type(batch_size, dim) + self.sum = self.mat_type(batch_size, dim) end function CombinerLayer:batch_resize(batch_size) if self.sum:nrow() ~= batch_size then - self.sum = self.gconf.cumat_type(batch_size, self.dim_in[1]) + self.sum = self.mat_type(batch_size, self.dim_in[1]) end end @@ -45,13 +50,13 @@ function CombinerLayer:propagate(input, output) output[1]:add(output[1], input[i], 1.0, self.lambda[i]) end for i = 2, #self.dim_out do - output[i]:copy_fromd(output[1]) + output[i]:copy_from(output[1]) end end function CombinerLayer:back_propagate(bp_err, next_bp_err, input, output) local sum = self.sum - sum:copy_fromd(bp_err[1]) + sum:copy_from(bp_err[1]) for i = 2, #self.dim_out do sum:add(sum, bp_err[i], 1.0, 1.0) end diff --git a/nerv/layer/init.lua b/nerv/layer/init.lua index 23606e1..43c2250 100644 --- a/nerv/layer/init.lua +++ b/nerv/layer/init.lua @@ -70,22 +70,33 @@ function Layer:get_dim() return self.dim_in, self.dim_out end -function Layer:find_param(pid, l_conf, gconf, p_type, p_dim) - if l_conf[pid] ~= nil then - nerv.info("Param [%s] of layer [%s] found in layer_conf.", pid, self.id) - return l_conf[pid] +function Layer:find_param(pid_list, lconf, gconf, p_type, p_dim) + if type(pid_list) == "string" then + pid_list = {pid_list} end - local pid_g = self.id .. '_' .. pid --global identifier - local pr = l_conf.pr - local p - if pr ~= nil and pr:has_param(pid_g) == true then - nerv.info("Param [%s] of layer [%s] found in layer_conf.paramRepo.", pid, self.id) - p = pr:get_param(pid_g) - return p + pid_list_str = table.tostring(pid_list) + for i, pid in ipairs(pid_list) do + if lconf[pid] ~= nil then + nerv.info("param [%s] of layer [%s] found in `layer_conf`.", pid, self.id) + return lconf[pid] + end + local pid_g = self.id .. '_' .. pid --global identifier + local pr = lconf.pr + local p + if pr ~= nil and pr:has_param(pid_g) == true then + nerv.info("param [%s] of layer [%s] found in `layer_conf.pr`.", pid_list_str, self.id) + p = pr:get_param(pid_g) + return p + end end - nerv.info("Param [%s] of layer [%s] is not found in layer_conf or layer_conf.paramRepo, switch to auto-generate.", pid, self.id) + nerv.info("param [%s] of layer [%s] is not found in `layer_conf` or `layer_conf.pr`, " .. + "switch to auto-generate.", pid_list_str, self.id) + local pid_g = self.id .. '_' .. pid_list[1] p = p_type(pid_g, gconf) p.trans = gconf.cumat_type(unpack(p_dim)) + if type(gconf.param_random) ~= "function" then + nerv.error("a param generate function is needed") + end p.trans:generate(gconf.param_random) return p end diff --git a/nerv/layer/mse.lua b/nerv/layer/mse.lua index 0ee3080..1c218d0 100644 --- a/nerv/layer/mse.lua +++ b/nerv/layer/mse.lua @@ -5,6 +5,11 @@ function MSELayer:__init(id, global_conf, layer_conf) self.dim_in = layer_conf.dim_in self.dim_out = layer_conf.dim_out self.gconf = global_conf + if self.gconf.use_cpu then + self.mat_type = self.gconf.mmat_type + else + self.mat_type = self.gconf.cumat_type + end self:check_dim_len(2, -1) end @@ -15,15 +20,15 @@ function MSELayer:init(batch_size) self.scale = 1 / self.dim_in[1] self.total_mse = 0.0 self.total_frames = 0 - self.mse = self.gconf.cumat_type(batch_size, self.dim_in[1]) - self.mse_sum = self.gconf.cumat_type(batch_size, 1) + self.mse = self.mat_type(batch_size, self.dim_in[1]) + self.mse_sum = self.mat_type(batch_size, 1) self.diff = self.mse:create() end function MSELayer:batch_resize(batch_size) if self.mse:nrow() ~= batch_resize then - self.mse = self.gconf.cumat_type(batch_size, self.dim_in[1]) - self.mse_sum = self.gconf.cumat_type(batch_size, 1) + self.mse = self.mat_type(batch_size, self.dim_in[1]) + self.mse_sum = self.mat_type(batch_size, 1) self.diff = self.mse:create() end end @@ -36,11 +41,11 @@ function MSELayer:propagate(input, output) local mse = self.mse local mse_sum = self.mse_sum mse:add(input[1], input[2], 1.0, -1.0) - self.diff:copy_fromd(mse) + self.diff:copy_from(mse) mse:mul_elem(mse, mse) mse_sum:add(mse_sum, mse:rowsum(mse), 0.0, self.scale) if output[1] ~= nil then - output[1]:copy_fromd(mse_sum) + output[1]:copy_from(mse_sum) end self.total_mse = self.total_mse + mse_sum:colsum()[0][0] self.total_frames = self.total_frames + mse_sum:nrow() diff --git a/nerv/layer/softmax_ce.lua b/nerv/layer/softmax_ce.lua index 9071e86..31a2ad7 100644 --- a/nerv/layer/softmax_ce.lua +++ b/nerv/layer/softmax_ce.lua @@ -3,6 +3,11 @@ local SoftmaxCELayer = nerv.class("nerv.SoftmaxCELayer", "nerv.Layer") function SoftmaxCELayer:__init(id, global_conf, layer_conf) self.id = id self.gconf = global_conf + if self.gconf.use_cpu then + self.mat_type = self.gconf.mmat_type + else + self.mat_type = self.gconf.cumat_type + end self.dim_in = layer_conf.dim_in self.dim_out = layer_conf.dim_out self.compressed = layer_conf.compressed @@ -19,13 +24,13 @@ function SoftmaxCELayer:init(batch_size) self.total_ce = 0.0 self.total_correct = 0 self.total_frames = 0 - self.softmax = self.gconf.cumat_type(batch_size, self.dim_in[1]) + self.softmax = self.mat_type(batch_size, self.dim_in[1]) self.ce = self.softmax:create() end function SoftmaxCELayer:batch_resize(batch_size) if self.softmax:nrow() ~= batch_resize then - self.softmax = self.gconf.cumat_type(batch_size, self.dim_in[1]) + self.softmax = self.mat_type(batch_size, self.dim_in[1]) self.ce = self.softmax:create() end end @@ -46,7 +51,7 @@ function SoftmaxCELayer:propagate(input, output) ce:mul_elem(ce, label) ce = ce:rowsum() if output[1] ~= nil then - output[1]:copy_fromd(ce) + output[1]:copy_from(ce) end -- add total ce self.total_ce = self.total_ce - ce:colsum()[0][0] diff --git a/nerv/layer/window.lua b/nerv/layer/window.lua index 8eed352..4933de0 100644 --- a/nerv/layer/window.lua +++ b/nerv/layer/window.lua @@ -23,7 +23,7 @@ function WindowLayer:batch_resize(batch_size) end function WindowLayer:propagate(input, output) - output[1]:copy_fromd(input[1]) + output[1]:copy_from(input[1]) output[1]:scale_rows_by_row(self.window.trans) end |