diff options
Diffstat (limited to 'nerv/layer')
-rw-r--r-- | nerv/layer/affine.lua | 43 | ||||
-rw-r--r-- | nerv/layer/bias.lua | 15 | ||||
-rw-r--r-- | nerv/layer/combiner.lua | 16 | ||||
-rw-r--r-- | nerv/layer/dropout.lua | 16 | ||||
-rw-r--r-- | nerv/layer/duplicate.lua | 7 | ||||
-rw-r--r-- | nerv/layer/elem_mul.lua | 11 | ||||
-rw-r--r-- | nerv/layer/graph.lua | 7 | ||||
-rw-r--r-- | nerv/layer/gru.lua | 20 | ||||
-rw-r--r-- | nerv/layer/identity.lua | 7 | ||||
-rw-r--r-- | nerv/layer/init.lua | 60 | ||||
-rw-r--r-- | nerv/layer/lstm.lua | 20 | ||||
-rw-r--r-- | nerv/layer/lstm_gate.lua | 17 | ||||
-rw-r--r-- | nerv/layer/mse.lua | 16 | ||||
-rw-r--r-- | nerv/layer/rnn.lua | 7 | ||||
-rw-r--r-- | nerv/layer/sigmoid.lua | 11 | ||||
-rw-r--r-- | nerv/layer/softmax.lua | 11 | ||||
-rw-r--r-- | nerv/layer/softmax_ce.lua | 16 | ||||
-rw-r--r-- | nerv/layer/tanh.lua | 11 | ||||
-rw-r--r-- | nerv/layer/window.lua | 15 |
19 files changed, 168 insertions, 158 deletions
diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua index 4156dde..38743aa 100644 --- a/nerv/layer/affine.lua +++ b/nerv/layer/affine.lua @@ -8,21 +8,19 @@ local AffineLayer = nerv.class('nerv.AffineLayer', 'nerv.Layer') --- A parameter that consists of a single matrix -- @type nerv.MatrixParam +function MatrixParam:check(checker) + -- check trans matrix type + checker(self.trans) +end + --- Read from a file handle. -- @param handle the file handle function MatrixParam:read(handle) self.trans = self.gconf.mmat_type.load(handle) - if not self.gconf.use_cpu then - self.trans = self.gconf.cumat_type.new_from_host(self.trans) - end end function MatrixParam:write(handle) - local trans = self.trans - if not self.gconf.use_cpu then - trans = self.trans:new_to_host() - end - trans:save(handle) + self.trans:save(handle) end function MatrixParam:train_init() @@ -30,6 +28,12 @@ function MatrixParam:train_init() self.correction:fill(0) end +function MatrixParam:copy(copier) + local target = nerv.MatrixParam(self.id, self.gconf) + target.trans = copier(self.trans) + return target +end + function MatrixParam:_update_by_gradient(gradient, alpha, beta) local gconf = self.gconf -- momentum gain @@ -77,25 +81,24 @@ end --- The constructor. function AffineLayer:__init(id, global_conf, layer_conf) - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - if layer_conf.ltp ~= nil and layer_conf.ltp1 == nil then - layer_conf.ltp1 = layer_conf.ltp - end + nerv.Layer.__init(self, id, global_conf, layer_conf) + self:check_dim_len(-1, 1) -- exactly one output, allow multiple inputs + self:bind_params() +end + +function AffineLayer:bind_params() for i = 1, #self.dim_in do local pid = "ltp" .. i local pid_list = i == 1 and {pid, "ltp"} or pid - self["ltp" .. i] = self:find_param(pid_list, layer_conf, global_conf, + self["ltp" .. i] = self:find_param(pid_list, self.lconf, self.gconf, nerv.LinearTransParam, - {self.dim_in[i], self.dim_out[1]}) + {self.dim_in[i], self.dim_out[1]}) end self.ltp = self.ltp1 -- alias of ltp1 - self.bp = self:find_param("bp", layer_conf, global_conf, + self.bp = self:find_param("bp", self.lconf, self.gconf, nerv.BiasParam, {1, self.dim_out[1]}) - self.gconf = global_conf - self:check_dim_len(-1, 1) -- exactly one output, allow multiple inputs + end function AffineLayer:init(batch_size) @@ -142,7 +145,7 @@ function AffineLayer:back_propagate(bp_err, next_bp_err, input, output) end function AffineLayer:get_params() - local pr = nerv.ParamRepo({self.ltp1, self.bp}) + local pr = nerv.ParamRepo({self.ltp1, self.bp}, self.loc_type) for i = 2, #self.dim_in do pr:add(self["ltp" .. i].id, self["ltp" .. i]) end diff --git a/nerv/layer/bias.lua b/nerv/layer/bias.lua index 924c3da..191be78 100644 --- a/nerv/layer/bias.lua +++ b/nerv/layer/bias.lua @@ -1,12 +1,15 @@ local BiasLayer = nerv.class("nerv.BiasLayer", "nerv.Layer") function BiasLayer:__init(id, global_conf, layer_conf) - self.id = id - self.gconf = global_conf - self.bias = layer_conf.bias - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out + nerv.Layer.__init(self, id, global_conf, layer_conf) self:check_dim_len(1, 1) + self:bind_params() +end + +function BiasLayer:bind_params() + self.bias = self:find_param("bias", self.lconf, self.gconf, + nerv.BiasParam, + {1, self.dim_out[1]}) end function BiasLayer:init() @@ -28,5 +31,5 @@ function BiasLayer:propagate(input, output) end function BiasLayer:get_params() - return nerv.ParamRepo({self.bias}) + return nerv.ParamRepo({self.bias}, self.loc_type) end diff --git a/nerv/layer/combiner.lua b/nerv/layer/combiner.lua index 22e89a9..028c970 100644 --- a/nerv/layer/combiner.lua +++ b/nerv/layer/combiner.lua @@ -1,16 +1,8 @@ local CombinerLayer = nerv.class('nerv.CombinerLayer', 'nerv.Layer') function CombinerLayer:__init(id, global_conf, layer_conf) - self.id = id + nerv.Layer.__init(self, id, global_conf, layer_conf) self.lambda = layer_conf.lambda - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf - if self.gconf.use_cpu then - self.mat_type = self.gconf.mmat_type - else - self.mat_type = self.gconf.cumat_type - end self:check_dim_len(#self.lambda, -1) if #self.dim_in < 1 then nerv.error("no input specified") @@ -20,6 +12,10 @@ function CombinerLayer:__init(id, global_conf, layer_conf) end end +function CombinerLayer:bind_params() + -- do nothing +end + function CombinerLayer:init(batch_size) local dim = self.dim_in[1] for i = 2, #self.dim_in do @@ -66,5 +62,5 @@ function CombinerLayer:back_propagate(bp_err, next_bp_err, input, output) end function CombinerLayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/dropout.lua b/nerv/layer/dropout.lua index 42660cc..1a379c9 100644 --- a/nerv/layer/dropout.lua +++ b/nerv/layer/dropout.lua @@ -1,22 +1,18 @@ local DropoutLayer = nerv.class("nerv.DropoutLayer", "nerv.Layer") function DropoutLayer:__init(id, global_conf, layer_conf) - self.id = id - self.gconf = global_conf - if self.gconf.use_cpu then - self.mat_type = self.gconf.mmat_type - else - self.mat_type = self.gconf.cumat_type - end + nerv.Layer.__init(self, id, global_conf, layer_conf) self.rate = layer_conf.dropout_rate or global_conf.dropout_rate if self.rate == nil then nerv.warning("[DropoutLayer:propagate] dropout rate is not set") end - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out self:check_dim_len(1, 1) -- two inputs: nn output and label end +function DropoutLayer:bind_params() + -- do nothing +end + function DropoutLayer:init(batch_size, chunk_size) if self.dim_in[1] ~= self.dim_out[1] then nerv.error("mismatching dimensions of input and output") @@ -73,5 +69,5 @@ function DropoutLayer:back_propagate(bp_err, next_bp_err, input, output, t) end function DropoutLayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/duplicate.lua b/nerv/layer/duplicate.lua index 8988617..137472b 100644 --- a/nerv/layer/duplicate.lua +++ b/nerv/layer/duplicate.lua @@ -1,10 +1,7 @@ local DuplicateLayer = nerv.class('nerv.DuplicateLayer', 'nerv.Layer') function DuplicateLayer:__init(id, global_conf, layer_conf) - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf + nerv.Layer.__init(self, id, global_conf, layer_conf) self:check_dim_len(1, -1) if #self.dim_out < 1 then nerv.error('no output specified') @@ -40,5 +37,5 @@ function DuplicateLayer:update() end function DuplicateLayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/elem_mul.lua b/nerv/layer/elem_mul.lua index fe80a3f..f03649b 100644 --- a/nerv/layer/elem_mul.lua +++ b/nerv/layer/elem_mul.lua @@ -1,14 +1,15 @@ local ElemMulLayer = nerv.class('nerv.ElemMulLayer', 'nerv.Layer') function ElemMulLayer:__init(id, global_conf, layer_conf) - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf + nerv.Layer.__init(self, id, global_conf, layer_conf) -- element-wise multiplication of input[1] and input[2] self:check_dim_len(2, 1) end +function ElemMulLayer:bind_params() + -- do nothing +end + function ElemMulLayer:init(batch_size) if self.dim_in[1] ~= self.dim_in[2] or self.dim_in[1] ~= self.dim_out[1] then @@ -34,5 +35,5 @@ function ElemMulLayer:update(bp_err, input, output) end function ElemMulLayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/graph.lua b/nerv/layer/graph.lua index 1406eff..5f42fca 100644 --- a/nerv/layer/graph.lua +++ b/nerv/layer/graph.lua @@ -1,10 +1,7 @@ local GraphLayer = nerv.class('nerv.GraphLayer', 'nerv.Layer') function GraphLayer:__init(id, global_conf, layer_conf) - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf + nerv.Layer.__init(self, id, global_conf, layer_conf) self:graph_init(layer_conf.layer_repo, layer_conf.connections) end @@ -155,5 +152,5 @@ function GraphLayer:get_params() table.insert(param_repos, ref.layer:get_params()) end end - return nerv.ParamRepo.merge(param_repos) + return nerv.ParamRepo.merge(param_repos, self.loc_type) end diff --git a/nerv/layer/gru.lua b/nerv/layer/gru.lua index e81d21a..71718d7 100644 --- a/nerv/layer/gru.lua +++ b/nerv/layer/gru.lua @@ -4,11 +4,7 @@ function GRULayer:__init(id, global_conf, layer_conf) -- input1:x -- input2:h -- input3:c (h^~) - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf - + nerv.Layer.__init(self, id, global_conf, layer_conf) if self.dim_in[2] ~= self.dim_out[1] then nerv.error("dim_in[2](%d) mismatch with dim_out[1](%d)", self.dim_in[2], self.dim_out[1]) @@ -17,7 +13,7 @@ function GRULayer:__init(id, global_conf, layer_conf) -- prepare a DAGLayer to hold the lstm structure local pr = layer_conf.pr if pr == nil then - pr = nerv.ParamRepo() + pr = nerv.ParamRepo({}, self.loc_type) end local function ap(str) @@ -63,7 +59,7 @@ function GRULayer:__init(id, global_conf, layer_conf) }, } - local layerRepo = nerv.LayerRepo(layers, pr, global_conf) + self.lrepo = nerv.LayerRepo(layers, pr, global_conf) local connections = { ["<input>[1]"] = ap("inputXDup[1]"), @@ -97,12 +93,20 @@ function GRULayer:__init(id, global_conf, layer_conf) self.dag = nerv.DAGLayer(self.id, global_conf, {dim_in = self.dim_in, dim_out = self.dim_out, - sub_layers = layerRepo, + sub_layers = self.lrepo, connections = connections}) self:check_dim_len(2, 1) -- x, h and h end +function GRULayer:bind_params() + local pr = layer_conf.pr + if pr == nil then + pr = nerv.ParamRepo({}, self.loc_type) + end + self.lrepo:rebind(pr) +end + function GRULayer:init(batch_size, chunk_size) self.dag:init(batch_size, chunk_size) end diff --git a/nerv/layer/identity.lua b/nerv/layer/identity.lua index aeeff89..d56337d 100644 --- a/nerv/layer/identity.lua +++ b/nerv/layer/identity.lua @@ -1,10 +1,7 @@ local IdentityLayer = nerv.class('nerv.IdentityLayer', 'nerv.Layer') function IdentityLayer:__init(id, global_conf, layer_conf) - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf + nerv.Layer.__init(self, id, global_conf, layer_conf) self:check_dim_len(1, 1) if self.dim_in[1] ~= self.dim_out[1] then nerv.error('mismatching dimensions of input and output') @@ -29,5 +26,5 @@ function IdentityLayer:update() end function IdentityLayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/init.lua b/nerv/layer/init.lua index 4fabefa..475ef62 100644 --- a/nerv/layer/init.lua +++ b/nerv/layer/init.lua @@ -30,7 +30,18 @@ end local Layer = nerv.class('nerv.Layer') function Layer:__init(id, global_conf, layer_conf) - nerv.error_method_not_implemented() + self.id = id + self.gconf = global_conf + self.lconf = layer_conf + if self.gconf.use_cpu then + self.mat_type = self.gconf.mmat_type + self.loc_type = nerv.ParamRepo.LOC_TYPES.ON_HOST + else + self.mat_type = self.gconf.cumat_type + self.loc_type = nerv.ParamRepo.LOC_TYPES.ON_DEVICE + end + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out end function Layer:init(batch_size) @@ -66,6 +77,10 @@ function Layer:get_params() nerv.error_method_not_implemented() end +function Layer:bind_params() + nerv.error_method_not_implemented() +end + function Layer:get_dim() return self.dim_in, self.dim_out end @@ -78,30 +93,33 @@ function Layer:get_sublayer(id) nerv.error('primitive layer does not have sublayers') end -function Layer:find_param(pid_list, lconf, gconf, p_type, p_dim) - if type(pid_list) == "string" then - pid_list = {pid_list} +function Layer:find_param(plist, lconf, gconf, p_type, p_dim) + if type(plist) == "string" then + plist = {plist} end - pid_list_str = table.tostring(pid_list) - for i, pid in ipairs(pid_list) do - if lconf[pid] ~= nil then - nerv.info("param [%s] of layer [%s] found in `layer_conf`.", pid, self.id) - return lconf[pid] + if lconf.params == nil then + lconf.params = {} + end + plist_str = table.tostring(plist) + local pid + for i, pname in ipairs(plist) do + if lconf.params[pname] ~= nil then + nerv.info("param id for [%s] of layer [%s] specified in `layer_conf.params`.", pname, self.id) + pid = lconf.params[pname] end - local pid_g = self.id .. '_' .. pid --global identifier - local pr = lconf.pr - local p - if pr ~= nil and pr:has_param(pid_g) == true then - nerv.info("param [%s] of layer [%s] found in `layer_conf.pr`.", pid_list_str, self.id) - p = pr:get_param(pid_g) - return p + if lconf.pr:has_param(pid) then + return lconf.pr:get_param(pid) end end - nerv.info("param [%s] of layer [%s] is not found in `layer_conf` or `layer_conf.pr`, " .. - "switch to auto-generate", pid_list_str, self.id) - local pid_g = self.id .. '_' .. pid_list[1] - p = p_type(pid_g, gconf) - p.trans = gconf.cumat_type(unpack(p_dim)) + pid = self.id .. '_' .. plist[1] + if lconf.pr:has_param(pid) then + nerv.info("param id for [%s] of layer [%s] is generated automatically.", pname, self.id) + return lconf.pr:get_param(pid) + end + nerv.info("param id for [%s] of layer [%s] is not found in the specified param repo, " .. + "switch to auto-generate", plist_str, self.id) + local p = p_type(pid, gconf) + p.trans = self.mat_type(unpack(p_dim)) if type(gconf.param_random) ~= "function" then nerv.error("a param generate function is needed") end diff --git a/nerv/layer/lstm.lua b/nerv/layer/lstm.lua index caa7569..641d5dc 100644 --- a/nerv/layer/lstm.lua +++ b/nerv/layer/lstm.lua @@ -4,15 +4,11 @@ function LSTMLayer:__init(id, global_conf, layer_conf) -- input1:x -- input2:h -- input3:c - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf - + nerv.Layer.__init(self, id, global_conf, layer_conf) -- prepare a DAGLayer to hold the lstm structure local pr = layer_conf.pr if pr == nil then - pr = nerv.ParamRepo() + pr = nerv.ParamRepo({}, self.loc_type) end local function ap(str) @@ -66,7 +62,7 @@ function LSTMLayer:__init(id, global_conf, layer_conf) }, } - local layerRepo = nerv.LayerRepo(layers, pr, global_conf) + self.lrepo = nerv.LayerRepo(layers, pr, global_conf) local connections = { ["<input>[1]"] = ap("inputXDup[1]"), @@ -109,12 +105,20 @@ function LSTMLayer:__init(id, global_conf, layer_conf) self.dag = nerv.DAGLayer(self.id, global_conf, {dim_in = self.dim_in, dim_out = self.dim_out, - sub_layers = layerRepo, + sub_layers = self.lrepo, connections = connections}) self:check_dim_len(3, 2) -- x, h, c and h, c end +function LSTMLayer:bind_params() + local pr = layer_conf.pr + if pr == nil then + pr = nerv.ParamRepo({}, self.loc_type) + end + self.lrepo:rebind(pr) +end + function LSTMLayer:init(batch_size, chunk_size) self.dag:init(batch_size, chunk_size) end diff --git a/nerv/layer/lstm_gate.lua b/nerv/layer/lstm_gate.lua index 1963eba..7a27bab 100644 --- a/nerv/layer/lstm_gate.lua +++ b/nerv/layer/lstm_gate.lua @@ -2,20 +2,19 @@ local LSTMGateLayer = nerv.class('nerv.LSTMGateLayer', 'nerv.Layer') -- NOTE: this is a full matrix gate function LSTMGateLayer:__init(id, global_conf, layer_conf) - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf + nerv.Layer.__init(self, id, global_conf, layer_conf) + self:check_dim_len(-1, 1) --accept multiple inputs + self:bind_params() +end +function LSTMGateLayer:bind_params() for i = 1, #self.dim_in do - self["ltp" .. i] = self:find_param("ltp" .. i, layer_conf, global_conf, + self["ltp" .. i] = self:find_param("ltp" .. i, self.lconf, self.gconf, nerv.LinearTransParam, {self.dim_in[i], self.dim_out[1]}) end - self.bp = self:find_param("bp", layer_conf, global_conf, + self.bp = self:find_param("bp", self.lconf, self.gconf, nerv.BiasParam, {1, self.dim_out[1]}) - - self:check_dim_len(-1, 1) --accept multiple inputs end function LSTMGateLayer:init(batch_size) @@ -69,7 +68,7 @@ function LSTMGateLayer:update(bp_err, input, output) end function LSTMGateLayer:get_params() - local pr = nerv.ParamRepo({self.bp}) + local pr = nerv.ParamRepo({self.bp}, self.loc_type) for i = 1, #self.dim_in do pr:add(self["ltp" .. i].id, self["ltp" .. i]) end diff --git a/nerv/layer/mse.lua b/nerv/layer/mse.lua index 1c218d0..458d086 100644 --- a/nerv/layer/mse.lua +++ b/nerv/layer/mse.lua @@ -1,18 +1,14 @@ local MSELayer = nerv.class("nerv.MSELayer", "nerv.Layer") function MSELayer:__init(id, global_conf, layer_conf) - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf - if self.gconf.use_cpu then - self.mat_type = self.gconf.mmat_type - else - self.mat_type = self.gconf.cumat_type - end + nerv.Layer.__init(self, id, global_conf, layer_conf) self:check_dim_len(2, -1) end +function MSELayer:bind_params() + -- do nothing +end + function MSELayer:init(batch_size) if self.dim_in[1] ~= self.dim_in[2] then nerv.error("mismatching dimensions of previous network output and labels") @@ -61,5 +57,5 @@ function MSELayer:back_propagate(bp_err, next_bp_err, input, output) end function MSELayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/rnn.lua b/nerv/layer/rnn.lua index 38f2326..e59cf5b 100644 --- a/nerv/layer/rnn.lua +++ b/nerv/layer/rnn.lua @@ -1,10 +1,7 @@ local RNNLayer = nerv.class('nerv.RNNLayer', 'nerv.GraphLayer') function RNNLayer:__init(id, global_conf, layer_conf) - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = layer_conf.gconf + nerv.Layer.__init(self, id, global_conf, layer_conf) self:check_dim_len(1, 1) local din = layer_conf.dim_in[1] @@ -12,7 +9,7 @@ function RNNLayer:__init(id, global_conf, layer_conf) local pr = layer_conf.pr if pr == nil then - pr = nerv.ParamRepo() + pr = nerv.ParamRepo({}, self.loc_type) end local layers = { diff --git a/nerv/layer/sigmoid.lua b/nerv/layer/sigmoid.lua index 0a8bcdc..a9f9749 100644 --- a/nerv/layer/sigmoid.lua +++ b/nerv/layer/sigmoid.lua @@ -1,13 +1,14 @@ local SigmoidLayer = nerv.class("nerv.SigmoidLayer", "nerv.Layer") function SigmoidLayer:__init(id, global_conf, layer_conf) - self.id = id - self.gconf = global_conf - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out + nerv.Layer.__init(self, id, global_conf, layer_conf) self:check_dim_len(1, 1) end +function SigmoidLayer:bind_params() + -- do nothing +end + function SigmoidLayer:init() if self.dim_in[1] ~= self.dim_out[1] then nerv.error("mismatching dimensions of input and output") @@ -31,5 +32,5 @@ function SigmoidLayer:back_propagate(bp_err, next_bp_err, input, output) end function SigmoidLayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/softmax.lua b/nerv/layer/softmax.lua index 4205b66..f7a5163 100644 --- a/nerv/layer/softmax.lua +++ b/nerv/layer/softmax.lua @@ -1,13 +1,14 @@ local SoftmaxLayer = nerv.class("nerv.SoftmaxLayer", "nerv.Layer") function SoftmaxLayer:__init(id, global_conf, layer_conf) - self.id = id - self.gconf = global_conf - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out + nerv.Layer.__init(self, id, global_conf, layer_conf) self:check_dim_len(1, 1) -- two inputs: nn output and label end +function SoftmaxLayer:bind_params() + -- do nothing +end + function SoftmaxLayer:init(batch_size) if self.dim_in[1] ~= self.dim_out[1] then nerv.error("mismatching dimensions of input and output") @@ -31,5 +32,5 @@ function SoftmaxLayer:back_propagate(bp_err, next_bp_err, input, output) end function SoftmaxLayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/softmax_ce.lua b/nerv/layer/softmax_ce.lua index d7d650e..7b4a80c 100644 --- a/nerv/layer/softmax_ce.lua +++ b/nerv/layer/softmax_ce.lua @@ -1,15 +1,7 @@ local SoftmaxCELayer = nerv.class("nerv.SoftmaxCELayer", "nerv.Layer") function SoftmaxCELayer:__init(id, global_conf, layer_conf) - self.id = id - self.gconf = global_conf - if self.gconf.use_cpu then - self.mat_type = self.gconf.mmat_type - else - self.mat_type = self.gconf.cumat_type - end - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out + nerv.Layer.__init(self, id, global_conf, layer_conf) self.compressed = layer_conf.compressed if self.compressed == nil then self.compressed = false @@ -17,6 +9,10 @@ function SoftmaxCELayer:__init(id, global_conf, layer_conf) self:check_dim_len(2, -1) -- two inputs: nn output and label end +function SoftmaxCELayer:bind_params() + -- do nothing +end + function SoftmaxCELayer:init(batch_size, chunk_size) if not self.compressed and (self.dim_in[1] ~= self.dim_in[2]) then nerv.error("mismatching dimensions of previous network output and labels") @@ -94,5 +90,5 @@ function SoftmaxCELayer:back_propagate(bp_err, next_bp_err, input, output, t) end function SoftmaxCELayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/tanh.lua b/nerv/layer/tanh.lua index e1c32f2..7a19fc8 100644 --- a/nerv/layer/tanh.lua +++ b/nerv/layer/tanh.lua @@ -1,13 +1,14 @@ local TanhLayer = nerv.class("nerv.TanhLayer", "nerv.Layer") function TanhLayer:__init(id, global_conf, layer_conf) - self.id = id - self.gconf = global_conf - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out + nerv.Layer.__init(self, id, global_conf, layer_conf) self:check_dim_len(1, 1) end +function TanhLayer:bind_params() + -- do nothing +end + function TanhLayer:init() if self.dim_in[1] ~= self.dim_out[1] then nerv.error("mismatching dimensions of input and output") @@ -31,5 +32,5 @@ function TanhLayer:back_propagate(bp_err, next_bp_err, input, output) end function TanhLayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/window.lua b/nerv/layer/window.lua index 4933de0..364929f 100644 --- a/nerv/layer/window.lua +++ b/nerv/layer/window.lua @@ -1,12 +1,15 @@ local WindowLayer = nerv.class("nerv.WindowLayer", "nerv.Layer") function WindowLayer:__init(id, global_conf, layer_conf) - self.id = id - self.gconf = global_conf - self.window = layer_conf.window - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out + nerv.Layer.__init(self, id, global_conf, layer_conf) self:check_dim_len(1, 1) + self:bind_params() +end + +function WindowLayer:bind_params() + self.window = self:find_param("window", self.lconf, self.gconf, + nerv.BiasParam, + {1, self.dim_out[1]}) end function WindowLayer:init() @@ -28,5 +31,5 @@ function WindowLayer:propagate(input, output) end function WindowLayer:get_params() - return nerv.ParamRepo({self.window}) + return nerv.ParamRepo({self.window}, self.loc_type) end |