diff options
Diffstat (limited to 'nerv/layer')
-rw-r--r-- | nerv/layer/affine.lua | 43 | ||||
-rw-r--r-- | nerv/layer/bias.lua | 15 | ||||
-rw-r--r-- | nerv/layer/combiner.lua | 16 | ||||
-rw-r--r-- | nerv/layer/dropout.lua | 27 | ||||
-rw-r--r-- | nerv/layer/duplicate.lua | 41 | ||||
-rw-r--r-- | nerv/layer/elem_mul.lua | 11 | ||||
-rw-r--r-- | nerv/layer/graph.lua | 156 | ||||
-rw-r--r-- | nerv/layer/gru.lua | 20 | ||||
-rw-r--r-- | nerv/layer/identity.lua | 30 | ||||
-rw-r--r-- | nerv/layer/init.lua | 72 | ||||
-rw-r--r-- | nerv/layer/lstm.lua | 192 | ||||
-rw-r--r-- | nerv/layer/lstm_gate.lua | 17 | ||||
-rw-r--r-- | nerv/layer/mse.lua | 16 | ||||
-rw-r--r-- | nerv/layer/rnn.lua | 42 | ||||
-rw-r--r-- | nerv/layer/sigmoid.lua | 17 | ||||
-rw-r--r-- | nerv/layer/softmax.lua | 11 | ||||
-rw-r--r-- | nerv/layer/softmax_ce.lua | 16 | ||||
-rw-r--r-- | nerv/layer/tanh.lua | 11 | ||||
-rw-r--r-- | nerv/layer/window.lua | 15 |
19 files changed, 504 insertions, 264 deletions
diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua index 4156dde..38743aa 100644 --- a/nerv/layer/affine.lua +++ b/nerv/layer/affine.lua @@ -8,21 +8,19 @@ local AffineLayer = nerv.class('nerv.AffineLayer', 'nerv.Layer') --- A parameter that consists of a single matrix -- @type nerv.MatrixParam +function MatrixParam:check(checker) + -- check trans matrix type + checker(self.trans) +end + --- Read from a file handle. -- @param handle the file handle function MatrixParam:read(handle) self.trans = self.gconf.mmat_type.load(handle) - if not self.gconf.use_cpu then - self.trans = self.gconf.cumat_type.new_from_host(self.trans) - end end function MatrixParam:write(handle) - local trans = self.trans - if not self.gconf.use_cpu then - trans = self.trans:new_to_host() - end - trans:save(handle) + self.trans:save(handle) end function MatrixParam:train_init() @@ -30,6 +28,12 @@ function MatrixParam:train_init() self.correction:fill(0) end +function MatrixParam:copy(copier) + local target = nerv.MatrixParam(self.id, self.gconf) + target.trans = copier(self.trans) + return target +end + function MatrixParam:_update_by_gradient(gradient, alpha, beta) local gconf = self.gconf -- momentum gain @@ -77,25 +81,24 @@ end --- The constructor. function AffineLayer:__init(id, global_conf, layer_conf) - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - if layer_conf.ltp ~= nil and layer_conf.ltp1 == nil then - layer_conf.ltp1 = layer_conf.ltp - end + nerv.Layer.__init(self, id, global_conf, layer_conf) + self:check_dim_len(-1, 1) -- exactly one output, allow multiple inputs + self:bind_params() +end + +function AffineLayer:bind_params() for i = 1, #self.dim_in do local pid = "ltp" .. i local pid_list = i == 1 and {pid, "ltp"} or pid - self["ltp" .. i] = self:find_param(pid_list, layer_conf, global_conf, + self["ltp" .. i] = self:find_param(pid_list, self.lconf, self.gconf, nerv.LinearTransParam, - {self.dim_in[i], self.dim_out[1]}) + {self.dim_in[i], self.dim_out[1]}) end self.ltp = self.ltp1 -- alias of ltp1 - self.bp = self:find_param("bp", layer_conf, global_conf, + self.bp = self:find_param("bp", self.lconf, self.gconf, nerv.BiasParam, {1, self.dim_out[1]}) - self.gconf = global_conf - self:check_dim_len(-1, 1) -- exactly one output, allow multiple inputs + end function AffineLayer:init(batch_size) @@ -142,7 +145,7 @@ function AffineLayer:back_propagate(bp_err, next_bp_err, input, output) end function AffineLayer:get_params() - local pr = nerv.ParamRepo({self.ltp1, self.bp}) + local pr = nerv.ParamRepo({self.ltp1, self.bp}, self.loc_type) for i = 2, #self.dim_in do pr:add(self["ltp" .. i].id, self["ltp" .. i]) end diff --git a/nerv/layer/bias.lua b/nerv/layer/bias.lua index 924c3da..191be78 100644 --- a/nerv/layer/bias.lua +++ b/nerv/layer/bias.lua @@ -1,12 +1,15 @@ local BiasLayer = nerv.class("nerv.BiasLayer", "nerv.Layer") function BiasLayer:__init(id, global_conf, layer_conf) - self.id = id - self.gconf = global_conf - self.bias = layer_conf.bias - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out + nerv.Layer.__init(self, id, global_conf, layer_conf) self:check_dim_len(1, 1) + self:bind_params() +end + +function BiasLayer:bind_params() + self.bias = self:find_param("bias", self.lconf, self.gconf, + nerv.BiasParam, + {1, self.dim_out[1]}) end function BiasLayer:init() @@ -28,5 +31,5 @@ function BiasLayer:propagate(input, output) end function BiasLayer:get_params() - return nerv.ParamRepo({self.bias}) + return nerv.ParamRepo({self.bias}, self.loc_type) end diff --git a/nerv/layer/combiner.lua b/nerv/layer/combiner.lua index 22e89a9..028c970 100644 --- a/nerv/layer/combiner.lua +++ b/nerv/layer/combiner.lua @@ -1,16 +1,8 @@ local CombinerLayer = nerv.class('nerv.CombinerLayer', 'nerv.Layer') function CombinerLayer:__init(id, global_conf, layer_conf) - self.id = id + nerv.Layer.__init(self, id, global_conf, layer_conf) self.lambda = layer_conf.lambda - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf - if self.gconf.use_cpu then - self.mat_type = self.gconf.mmat_type - else - self.mat_type = self.gconf.cumat_type - end self:check_dim_len(#self.lambda, -1) if #self.dim_in < 1 then nerv.error("no input specified") @@ -20,6 +12,10 @@ function CombinerLayer:__init(id, global_conf, layer_conf) end end +function CombinerLayer:bind_params() + -- do nothing +end + function CombinerLayer:init(batch_size) local dim = self.dim_in[1] for i = 2, #self.dim_in do @@ -66,5 +62,5 @@ function CombinerLayer:back_propagate(bp_err, next_bp_err, input, output) end function CombinerLayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/dropout.lua b/nerv/layer/dropout.lua index 42660cc..39a8963 100644 --- a/nerv/layer/dropout.lua +++ b/nerv/layer/dropout.lua @@ -1,22 +1,17 @@ local DropoutLayer = nerv.class("nerv.DropoutLayer", "nerv.Layer") function DropoutLayer:__init(id, global_conf, layer_conf) - self.id = id - self.gconf = global_conf - if self.gconf.use_cpu then - self.mat_type = self.gconf.mmat_type - else - self.mat_type = self.gconf.cumat_type - end - self.rate = layer_conf.dropout_rate or global_conf.dropout_rate - if self.rate == nil then + nerv.Layer.__init(self, id, global_conf, layer_conf) + if self.gconf.dropout_rate == nil then nerv.warning("[DropoutLayer:propagate] dropout rate is not set") end - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out self:check_dim_len(1, 1) -- two inputs: nn output and label end +function DropoutLayer:bind_params() + -- do nothing +end + function DropoutLayer:init(batch_size, chunk_size) if self.dim_in[1] ~= self.dim_out[1] then nerv.error("mismatching dimensions of input and output") @@ -45,12 +40,12 @@ function DropoutLayer:propagate(input, output, t) if t == nil then t = 1 end - if self.rate then + if self.gconf.dropout_rate ~= 0 then self.mask[t]:rand_uniform() -- since we will lose a portion of the actvations, we multiply the -- activations by 1 / (1 - rate) to compensate - self.mask[t]:thres_mask(self.mask[t], self.rate, - 0, 1 / (1.0 - self.rate)) + self.mask[t]:thres_mask(self.mask[t], self.gconf.dropout_rate, + 0, 1 / (1.0 - self.gconf.dropout_rate)) output[1]:mul_elem(input[1], self.mask[t]) else output[1]:copy_fromd(input[1]) @@ -65,7 +60,7 @@ function DropoutLayer:back_propagate(bp_err, next_bp_err, input, output, t) if t == nil then t = 1 end - if self.rate then + if self.gconf.dropout_rate then next_bp_err[1]:mul_elem(bp_err[1], self.mask[t]) else next_bp_err[1]:copy_fromd(bp_err[1]) @@ -73,5 +68,5 @@ function DropoutLayer:back_propagate(bp_err, next_bp_err, input, output, t) end function DropoutLayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/duplicate.lua b/nerv/layer/duplicate.lua new file mode 100644 index 0000000..137472b --- /dev/null +++ b/nerv/layer/duplicate.lua @@ -0,0 +1,41 @@ +local DuplicateLayer = nerv.class('nerv.DuplicateLayer', 'nerv.Layer') + +function DuplicateLayer:__init(id, global_conf, layer_conf) + nerv.Layer.__init(self, id, global_conf, layer_conf) + self:check_dim_len(1, -1) + if #self.dim_out < 1 then + nerv.error('no output specified') + end + for i = 1, #self.dim_out do + if self.dim_out[i] ~= self.dim_in[1] then + nerv.error('mismatching dimensions of outputs') + end + end +end + +function DuplicateLayer:init() +end + +function DuplicateLayer:batch_resize() +end + +function DuplicateLayer:propagate(input, output) + for i = 1, #self.dim_out do + output[i]:copy_from(input[1]) + -- FIXME: use reference copy to speed up + end +end + +function DuplicateLayer:back_propagate(bp_err, next_bp_err) + next_bp_err[1]:copy_from(bp_err[1]) + for i = 2, #self.dim_out do + next_bp_err[1]:add(next_bp_err[1], bp_err[i], 1.0, 1.0) + end +end + +function DuplicateLayer:update() +end + +function DuplicateLayer:get_params() + return nerv.ParamRepo({}, self.loc_type) +end diff --git a/nerv/layer/elem_mul.lua b/nerv/layer/elem_mul.lua index fe80a3f..f03649b 100644 --- a/nerv/layer/elem_mul.lua +++ b/nerv/layer/elem_mul.lua @@ -1,14 +1,15 @@ local ElemMulLayer = nerv.class('nerv.ElemMulLayer', 'nerv.Layer') function ElemMulLayer:__init(id, global_conf, layer_conf) - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf + nerv.Layer.__init(self, id, global_conf, layer_conf) -- element-wise multiplication of input[1] and input[2] self:check_dim_len(2, 1) end +function ElemMulLayer:bind_params() + -- do nothing +end + function ElemMulLayer:init(batch_size) if self.dim_in[1] ~= self.dim_in[2] or self.dim_in[1] ~= self.dim_out[1] then @@ -34,5 +35,5 @@ function ElemMulLayer:update(bp_err, input, output) end function ElemMulLayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/graph.lua b/nerv/layer/graph.lua new file mode 100644 index 0000000..68d5f51 --- /dev/null +++ b/nerv/layer/graph.lua @@ -0,0 +1,156 @@ +local GraphLayer = nerv.class('nerv.GraphLayer', 'nerv.Layer') + +function GraphLayer:__init(id, global_conf, layer_conf) + nerv.Layer.__init(self, id, global_conf, layer_conf) + self:graph_init(layer_conf.layer_repo, layer_conf.connections) +end + +local function parse_id(str) + local id, port, _ + _, _, id, port = string.find(str, "([a-zA-Z0-9_.]+)%[([0-9]+)%]") + if id == nil or port == nil then + _, _, id, port = string.find(str, "(.+)%[([0-9]+)%]") + if not (id == "<input>" or id == "<output>") then + nerv.error("wrong format of connection id") + end + end + port = tonumber(port) + return id, port +end + +function GraphLayer:add_prefix(layers, connections) + local function ap(name) + return self.id .. '.' .. name + end + + for layer_type, sublayers in pairs(layers) do + local tmp = {} + for name, layer_config in pairs(sublayers) do + tmp[ap(name)] = layer_config + end + layers[layer_type] = tmp + end + + for i = 1, #connections do + local from, to = connections[i][1], connections[i][2] + if parse_id(from) ~= '<input>' then + connections[i][1] = ap(from) + end + if parse_id(to) ~= '<output>' then + connections[i][2] = ap(to) + end + end +end + +function GraphLayer:discover(id, layer_repo) + if id == '<output>' then + id = '<input>' + end + local layers = self.layers + local ref = layers[id] + if ref == nil then + local layer = layer_repo:get_layer(id) + local dim_in, dim_out = layer:get_dim() + self.layer_num = self.layer_num + 1 + ref = { + layer = layer, + inputs = {}, + outputs = {}, + dim_in = dim_in, + dim_out = dim_out, + id = self.layer_num, + } + layers[id] = ref + end + return ref +end + +function GraphLayer:graph_init(layer_repo, connections) + local layers = {} + layers['<input>'] = { + inputs = {}, + outputs = {}, + dim_in = self.dim_out, + dim_out = self.dim_in, + id = 0, + } + self.layers = layers + self.layer_num = 0 + self.connections = {} + + -- check data dimension between connected ports + for _, edge in pairs(connections) do + local from, to, time = edge[1], edge[2], edge[3] + local id_from, port_from = parse_id(from) + local id_to, port_to = parse_id(to) + local ref_from = self:discover(id_from, layer_repo) + local ref_to = self:discover(id_to, layer_repo) + if ref_from.outputs[port_from] ~= nil then + nerv.error('%s has already been attached', from) + end + if ref_to.inputs[port_to] ~= nil then + nerv.error('%s has already been attached', to) + end + if ref_from.dim_out[port_from] ~= ref_to.dim_in[port_to] then + nerv.error('mismatching data dimension between %s and %s', from, to) + end + if ref_from.id == 0 and ref_to.id == 0 then + nerv.error('short-circuit connection between <input> and <output>') + end + ref_from.outputs[port_from] = true + ref_to.inputs[port_to] = true + table.insert(self.connections, {ref_from.id, port_from, ref_to.id, port_to, time}) + end + + -- check dangling ports + for id, ref in pairs(layers) do + if id ~= '<input>' then + for i = 1, #ref.dim_in do + if ref.inputs[i] == nil then + nerv.error('dangling input port %d of layer %s', i, id) + end + end + for i = 1, #ref.dim_out do + if ref.outputs[i] == nil then + nerv.error('dangling output port %d of layer %s', i, id) + end + end + end + end + for i = 1, #self.dim_in do + if layers['<input>'].outputs[i] == nil then + nerv.error('dangling port %d of layer <input>', i) + end + end + for i = 1, #self.dim_out do + if layers['<input>'].inputs[i] == nil then + nerv.error('dangling port %d of layer <output>', i) + end + end +end + +function GraphLayer:set_attr(name, value) + self[name] = value + for id, ref in pairs(self.layers) do + if id ~= '<input>' then + ref.layer:set_attr(name, value) + end + end +end + +function GraphLayer:get_sublayer(id) + if self.layers[id] == nil or id == '<input>' then + nerv.error('layer with id %s not found', id) + end + return self.layers[id].layer +end + +function GraphLayer:get_params() + local param_repos = {} + for id, ref in pairs(self.layers) do + if id ~= '<input>' then + table.insert(param_repos, ref.layer:get_params()) + end + end + return nerv.ParamRepo.merge(param_repos, self.loc_type) +end diff --git a/nerv/layer/gru.lua b/nerv/layer/gru.lua index e81d21a..71718d7 100644 --- a/nerv/layer/gru.lua +++ b/nerv/layer/gru.lua @@ -4,11 +4,7 @@ function GRULayer:__init(id, global_conf, layer_conf) -- input1:x -- input2:h -- input3:c (h^~) - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf - + nerv.Layer.__init(self, id, global_conf, layer_conf) if self.dim_in[2] ~= self.dim_out[1] then nerv.error("dim_in[2](%d) mismatch with dim_out[1](%d)", self.dim_in[2], self.dim_out[1]) @@ -17,7 +13,7 @@ function GRULayer:__init(id, global_conf, layer_conf) -- prepare a DAGLayer to hold the lstm structure local pr = layer_conf.pr if pr == nil then - pr = nerv.ParamRepo() + pr = nerv.ParamRepo({}, self.loc_type) end local function ap(str) @@ -63,7 +59,7 @@ function GRULayer:__init(id, global_conf, layer_conf) }, } - local layerRepo = nerv.LayerRepo(layers, pr, global_conf) + self.lrepo = nerv.LayerRepo(layers, pr, global_conf) local connections = { ["<input>[1]"] = ap("inputXDup[1]"), @@ -97,12 +93,20 @@ function GRULayer:__init(id, global_conf, layer_conf) self.dag = nerv.DAGLayer(self.id, global_conf, {dim_in = self.dim_in, dim_out = self.dim_out, - sub_layers = layerRepo, + sub_layers = self.lrepo, connections = connections}) self:check_dim_len(2, 1) -- x, h and h end +function GRULayer:bind_params() + local pr = layer_conf.pr + if pr == nil then + pr = nerv.ParamRepo({}, self.loc_type) + end + self.lrepo:rebind(pr) +end + function GRULayer:init(batch_size, chunk_size) self.dag:init(batch_size, chunk_size) end diff --git a/nerv/layer/identity.lua b/nerv/layer/identity.lua new file mode 100644 index 0000000..d56337d --- /dev/null +++ b/nerv/layer/identity.lua @@ -0,0 +1,30 @@ +local IdentityLayer = nerv.class('nerv.IdentityLayer', 'nerv.Layer') + +function IdentityLayer:__init(id, global_conf, layer_conf) + nerv.Layer.__init(self, id, global_conf, layer_conf) + self:check_dim_len(1, 1) + if self.dim_in[1] ~= self.dim_out[1] then + nerv.error('mismatching dimensions of input and output') + end +end + +function IdentityLayer:init() +end + +function IdentityLayer:batch_resize() +end + +function IdentityLayer:propagate(input, output) + output[1]:copy_from(input[1]) +end + +function IdentityLayer:back_propagate(bp_err, next_bp_err) + next_bp_err[1]:copy_from(bp_err[1]) +end + +function IdentityLayer:update() +end + +function IdentityLayer:get_params() + return nerv.ParamRepo({}, self.loc_type) +end diff --git a/nerv/layer/init.lua b/nerv/layer/init.lua index 54f33ae..475ef62 100644 --- a/nerv/layer/init.lua +++ b/nerv/layer/init.lua @@ -30,7 +30,18 @@ end local Layer = nerv.class('nerv.Layer') function Layer:__init(id, global_conf, layer_conf) - nerv.error_method_not_implemented() + self.id = id + self.gconf = global_conf + self.lconf = layer_conf + if self.gconf.use_cpu then + self.mat_type = self.gconf.mmat_type + self.loc_type = nerv.ParamRepo.LOC_TYPES.ON_HOST + else + self.mat_type = self.gconf.cumat_type + self.loc_type = nerv.ParamRepo.LOC_TYPES.ON_DEVICE + end + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out end function Layer:init(batch_size) @@ -66,34 +77,49 @@ function Layer:get_params() nerv.error_method_not_implemented() end +function Layer:bind_params() + nerv.error_method_not_implemented() +end + function Layer:get_dim() return self.dim_in, self.dim_out end -function Layer:find_param(pid_list, lconf, gconf, p_type, p_dim) - if type(pid_list) == "string" then - pid_list = {pid_list} +function Layer:set_attr(name, value) + self[name] = value +end + +function Layer:get_sublayer(id) + nerv.error('primitive layer does not have sublayers') +end + +function Layer:find_param(plist, lconf, gconf, p_type, p_dim) + if type(plist) == "string" then + plist = {plist} + end + if lconf.params == nil then + lconf.params = {} end - pid_list_str = table.tostring(pid_list) - for i, pid in ipairs(pid_list) do - if lconf[pid] ~= nil then - nerv.info("param [%s] of layer [%s] found in `layer_conf`.", pid, self.id) - return lconf[pid] + plist_str = table.tostring(plist) + local pid + for i, pname in ipairs(plist) do + if lconf.params[pname] ~= nil then + nerv.info("param id for [%s] of layer [%s] specified in `layer_conf.params`.", pname, self.id) + pid = lconf.params[pname] end - local pid_g = self.id .. '_' .. pid --global identifier - local pr = lconf.pr - local p - if pr ~= nil and pr:has_param(pid_g) == true then - nerv.info("param [%s] of layer [%s] found in `layer_conf.pr`.", pid_list_str, self.id) - p = pr:get_param(pid_g) - return p + if lconf.pr:has_param(pid) then + return lconf.pr:get_param(pid) end end - nerv.info("param [%s] of layer [%s] is not found in `layer_conf` or `layer_conf.pr`, " .. - "switch to auto-generate", pid_list_str, self.id) - local pid_g = self.id .. '_' .. pid_list[1] - p = p_type(pid_g, gconf) - p.trans = gconf.cumat_type(unpack(p_dim)) + pid = self.id .. '_' .. plist[1] + if lconf.pr:has_param(pid) then + nerv.info("param id for [%s] of layer [%s] is generated automatically.", pname, self.id) + return lconf.pr:get_param(pid) + end + nerv.info("param id for [%s] of layer [%s] is not found in the specified param repo, " .. + "switch to auto-generate", plist_str, self.id) + local p = p_type(pid, gconf) + p.trans = self.mat_type(unpack(p_dim)) if type(gconf.param_random) ~= "function" then nerv.error("a param generate function is needed") end @@ -101,6 +127,7 @@ function Layer:find_param(pid_list, lconf, gconf, p_type, p_dim) return p end +nerv.include('graph.lua') nerv.include('affine.lua') nerv.include('sigmoid.lua') nerv.include('tanh.lua') @@ -115,6 +142,9 @@ nerv.include('lstm.lua') nerv.include('lstm_gate.lua') nerv.include('dropout.lua') nerv.include('gru.lua') +nerv.include('rnn.lua') +nerv.include('duplicate.lua') +nerv.include('identity.lua') -- The following lines are for backward compatibility, and will be removed in -- the future. The use of these names are deprecated. diff --git a/nerv/layer/lstm.lua b/nerv/layer/lstm.lua index d8eee71..56f674a 100644 --- a/nerv/layer/lstm.lua +++ b/nerv/layer/lstm.lua @@ -1,143 +1,85 @@ -local LSTMLayer = nerv.class('nerv.LSTMLayer', 'nerv.Layer') +local LSTMLayer = nerv.class('nerv.LSTMLayer', 'nerv.GraphLayer') function LSTMLayer:__init(id, global_conf, layer_conf) - -- input1:x - -- input2:h - -- input3:c - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf + nerv.Layer.__init(self, id, global_conf, layer_conf) + self:check_dim_len(1, 1) + + local din = layer_conf.dim_in[1] + local dout = layer_conf.dim_out[1] - -- prepare a DAGLayer to hold the lstm structure local pr = layer_conf.pr if pr == nil then - pr = nerv.ParamRepo() - end - - local function ap(str) - return self.id .. '.' .. str + pr = nerv.ParamRepo({}, self.loc_type) end - local din1, din2, din3 = self.dim_in[1], self.dim_in[2], self.dim_in[3] - local dout1, dout2 = self.dim_out[1], self.dim_out[2] - local layers = { - ["nerv.CombinerLayer"] = { - [ap("inputXDup")] = {{}, {dim_in = {din1}, - dim_out = {din1, din1, din1, din1}, - lambda = {1}}}, - [ap("inputHDup")] = {{}, {dim_in = {din2}, - dim_out = {din2, din2, din2, din2}, - lambda = {1}}}, - - [ap("inputCDup")] = {{}, {dim_in = {din3}, - dim_out = {din3, din3, din3}, - lambda = {1}}}, - - [ap("mainCDup")] = {{}, {dim_in = {din3, din3}, - dim_out = {din3, din3, din3}, - lambda = {1, 1}}}, + local layers = { + ['nerv.CombinerLayer'] = { + mainCombine = {dim_in = {dout, dout}, dim_out = {dout}, lambda = {1, 1}}, }, - ["nerv.AffineLayer"] = { - [ap("mainAffineL")] = {{}, {dim_in = {din1, din2}, - dim_out = {dout1}, - pr = pr}}, + ['nerv.DuplicateLayer'] = { + inputDup = {dim_in = {din}, dim_out = {din, din, din, din}}, + outputDup = {dim_in = {dout}, dim_out = {dout, dout, dout, dout, dout}}, + cellDup = {dim_in = {dout}, dim_out = {dout, dout, dout, dout, dout}}, }, - ["nerv.TanhLayer"] = { - [ap("mainTanhL")] = {{}, {dim_in = {dout1}, dim_out = {dout1}}}, - [ap("outputTanhL")] = {{}, {dim_in = {dout1}, dim_out = {dout1}}}, + ['nerv.AffineLayer'] = { + mainAffine = {dim_in = {din, dout}, dim_out = {dout}, pr = pr}, }, - ["nerv.LSTMGateLayer"] = { - [ap("forgetGateL")] = {{}, {dim_in = {din1, din2, din3}, - dim_out = {din3}, pr = pr, - param_type = {'N', 'N', 'D'}}}, - [ap("inputGateL")] = {{}, {dim_in = {din1, din2, din3}, - dim_out = {din3}, pr = pr, - param_type = {'N', 'N', 'D'}}}, - [ap("outputGateL")] = {{}, {dim_in = {din1, din2, din3}, - dim_out = {din3}, pr = pr, - param_type = {'N', 'N', 'D'}}}, - + ['nerv.TanhLayer'] = { + mainTanh = {dim_in = {dout}, dim_out = {dout}}, + outputTanh = {dim_in = {dout}, dim_out = {dout}}, }, - ["nerv.ElemMulLayer"] = { - [ap("inputGMulL")] = {{}, {dim_in = {din3, din3}, - dim_out = {din3}}}, - [ap("forgetGMulL")] = {{}, {dim_in = {din3, din3}, - dim_out = {din3}}}, - [ap("outputGMulL")] = {{}, {dim_in = {din3, din3}, - dim_out = {din3}}}, + ['nerv.LSTMGateLayer'] = { + forgetGate = {dim_in = {din, dout, dout}, dim_out = {dout}, param_type = {'N', 'N', 'D'}, pr = pr}, + inputGate = {dim_in = {din, dout, dout}, dim_out = {dout}, param_type = {'N', 'N', 'D'}, pr = pr}, + outputGate = {dim_in = {din, dout, dout}, dim_out = {dout}, param_type = {'N', 'N', 'D'}, pr = pr}, + }, + ['nerv.ElemMulLayer'] = { + inputGateMul = {dim_in = {dout, dout}, dim_out = {dout}}, + forgetGateMul = {dim_in = {dout, dout}, dim_out = {dout}}, + outputGateMul = {dim_in = {dout, dout}, dim_out = {dout}}, }, } - local layerRepo = nerv.LayerRepo(layers, pr, global_conf) - local connections = { - ["<input>[1]"] = ap("inputXDup[1]"), - ["<input>[2]"] = ap("inputHDup[1]"), - ["<input>[3]"] = ap("inputCDup[1]"), - - [ap("inputXDup[1]")] = ap("mainAffineL[1]"), - [ap("inputHDup[1]")] = ap("mainAffineL[2]"), - [ap("mainAffineL[1]")] = ap("mainTanhL[1]"), - - [ap("inputXDup[2]")] = ap("inputGateL[1]"), - [ap("inputHDup[2]")] = ap("inputGateL[2]"), - [ap("inputCDup[1]")] = ap("inputGateL[3]"), - - [ap("inputXDup[3]")] = ap("forgetGateL[1]"), - [ap("inputHDup[3]")] = ap("forgetGateL[2]"), - [ap("inputCDup[2]")] = ap("forgetGateL[3]"), - - [ap("mainTanhL[1]")] = ap("inputGMulL[1]"), - [ap("inputGateL[1]")] = ap("inputGMulL[2]"), - - [ap("inputCDup[3]")] = ap("forgetGMulL[1]"), - [ap("forgetGateL[1]")] = ap("forgetGMulL[2]"), - - [ap("inputGMulL[1]")] = ap("mainCDup[1]"), - [ap("forgetGMulL[1]")] = ap("mainCDup[2]"), - - [ap("inputXDup[4]")] = ap("outputGateL[1]"), - [ap("inputHDup[4]")] = ap("outputGateL[2]"), - [ap("mainCDup[3]")] = ap("outputGateL[3]"), - - [ap("mainCDup[2]")] = "<output>[2]", - [ap("mainCDup[1]")] = ap("outputTanhL[1]"), - - [ap("outputTanhL[1]")] = ap("outputGMulL[1]"), - [ap("outputGateL[1]")] = ap("outputGMulL[2]"), - - [ap("outputGMulL[1]")] = "<output>[1]", + -- lstm input + {'<input>[1]', 'inputDup[1]', 0}, + + -- input gate + {'inputDup[1]', 'inputGate[1]', 0}, + {'outputDup[1]', 'inputGate[2]', 1}, + {'cellDup[1]', 'inputGate[3]', 1}, + + -- forget gate + {'inputDup[2]', 'forgetGate[1]', 0}, + {'outputDup[2]', 'forgetGate[2]', 1}, + {'cellDup[2]', 'forgetGate[3]', 1}, + + -- lstm cell + {'forgetGate[1]', 'forgetGateMul[1]', 0}, + {'cellDup[3]', 'forgetGateMul[2]', 1}, + {'inputDup[3]', 'mainAffine[1]', 0}, + {'outputDup[3]', 'mainAffine[2]', 1}, + {'mainAffine[1]', 'mainTanh[1]', 0}, + {'inputGate[1]', 'inputGateMul[1]', 0}, + {'mainTanh[1]', 'inputGateMul[2]', 0}, + {'inputGateMul[1]', 'mainCombine[1]', 0}, + {'forgetGateMul[1]', 'mainCombine[2]', 0}, + {'mainCombine[1]', 'cellDup[1]', 0}, + + -- forget gate + {'inputDup[4]', 'outputGate[1]', 0}, + {'outputDup[4]', 'outputGate[2]', 1}, + {'cellDup[4]', 'outputGate[3]', 0}, + + -- lstm output + {'cellDup[5]', 'outputTanh[1]', 0}, + {'outputGate[1]', 'outputGateMul[1]', 0}, + {'outputTanh[1]', 'outputGateMul[2]', 0}, + {'outputGateMul[1]', 'outputDup[1]', 0}, + {'outputDup[5]', '<output>[1]', 0}, } - self.dag = nerv.DAGLayer(self.id, global_conf, - {dim_in = self.dim_in, - dim_out = self.dim_out, - sub_layers = layerRepo, - connections = connections}) - - self:check_dim_len(3, 2) -- x, h, c and h, c -end - -function LSTMLayer:init(batch_size, chunk_size) - self.dag:init(batch_size, chunk_size) -end - -function LSTMLayer:batch_resize(batch_size, chunk_size) - self.dag:batch_resize(batch_size, chunk_size) -end - -function LSTMLayer:update(bp_err, input, output, t) - self.dag:update(bp_err, input, output, t) -end - -function LSTMLayer:propagate(input, output, t) - self.dag:propagate(input, output, t) -end - -function LSTMLayer:back_propagate(bp_err, next_bp_err, input, output, t) - self.dag:back_propagate(bp_err, next_bp_err, input, output, t) -end -function LSTMLayer:get_params() - return self.dag:get_params() + self:add_prefix(layers, connections) + local layer_repo = nerv.LayerRepo(layers, pr, global_conf) + self:graph_init(layer_repo, connections) end diff --git a/nerv/layer/lstm_gate.lua b/nerv/layer/lstm_gate.lua index 8785b4f..e690721 100644 --- a/nerv/layer/lstm_gate.lua +++ b/nerv/layer/lstm_gate.lua @@ -2,24 +2,23 @@ local LSTMGateLayer = nerv.class('nerv.LSTMGateLayer', 'nerv.Layer') -- NOTE: this is a full matrix gate function LSTMGateLayer:__init(id, global_conf, layer_conf) - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out + nerv.Layer.__init(self, id, global_conf, layer_conf) self.param_type = layer_conf.param_type - self.gconf = global_conf + self:check_dim_len(-1, 1) --accept multiple inputs + self:bind_params() +end +function LSTMGateLayer:bind_params() for i = 1, #self.dim_in do - self["ltp" .. i] = self:find_param("ltp" .. i, layer_conf, global_conf, + self["ltp" .. i] = self:find_param("ltp" .. i, self.lconf, self.gconf, nerv.LinearTransParam, {self.dim_in[i], self.dim_out[1]}) if self.param_type[i] == 'D' then self["ltp" .. i].trans:diagonalize() end end - self.bp = self:find_param("bp", layer_conf, global_conf, + self.bp = self:find_param("bp", self.lconf, self.gconf, nerv.BiasParam, {1, self.dim_out[1]}) - - self:check_dim_len(-1, 1) --accept multiple inputs end function LSTMGateLayer:init(batch_size) @@ -76,7 +75,7 @@ function LSTMGateLayer:update(bp_err, input, output) end function LSTMGateLayer:get_params() - local pr = nerv.ParamRepo({self.bp}) + local pr = nerv.ParamRepo({self.bp}, self.loc_type) for i = 1, #self.dim_in do pr:add(self["ltp" .. i].id, self["ltp" .. i]) end diff --git a/nerv/layer/mse.lua b/nerv/layer/mse.lua index 1c218d0..458d086 100644 --- a/nerv/layer/mse.lua +++ b/nerv/layer/mse.lua @@ -1,18 +1,14 @@ local MSELayer = nerv.class("nerv.MSELayer", "nerv.Layer") function MSELayer:__init(id, global_conf, layer_conf) - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf - if self.gconf.use_cpu then - self.mat_type = self.gconf.mmat_type - else - self.mat_type = self.gconf.cumat_type - end + nerv.Layer.__init(self, id, global_conf, layer_conf) self:check_dim_len(2, -1) end +function MSELayer:bind_params() + -- do nothing +end + function MSELayer:init(batch_size) if self.dim_in[1] ~= self.dim_in[2] then nerv.error("mismatching dimensions of previous network output and labels") @@ -61,5 +57,5 @@ function MSELayer:back_propagate(bp_err, next_bp_err, input, output) end function MSELayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/rnn.lua b/nerv/layer/rnn.lua new file mode 100644 index 0000000..0b5ccaa --- /dev/null +++ b/nerv/layer/rnn.lua @@ -0,0 +1,42 @@ +local RNNLayer = nerv.class('nerv.RNNLayer', 'nerv.GraphLayer') + +function RNNLayer:__init(id, global_conf, layer_conf) + nerv.Layer.__init(self, id, global_conf, layer_conf) + self:check_dim_len(1, 1) + + if layer_conf.activation == nil then + layer_conf.activation = 'nerv.SigmoidLayer' + end + + local din = layer_conf.dim_in[1] + local dout = layer_conf.dim_out[1] + + local pr = layer_conf.pr + if pr == nil then + pr = nerv.ParamRepo({}, self.loc_type) + end + + local layers = { + ['nerv.AffineLayer'] = { + main = {dim_in = {din, dout}, dim_out = {dout}, pr = pr}, + }, + [layers.activation] = { + activation = {dim_in = {dout}, dim_out = {dout}}, + }, + ['nerv.DuplicateLayer'] = { + duplicate = {dim_in = {dout}, dim_out = {dout, dout}}, + }, + } + + local connections = { + {'<input>[1]', 'main[1]', 0}, + {'main[1]', 'activation[1]', 0}, + {'activation[1]', 'duplicate[1]', 0}, + {'duplicate[1]', 'main[2]', 1}, + {'duplicate[2]', '<output>[1]', 0}, + } + + self:add_prefix(layers, connections) + local layer_repo = nerv.LayerRepo(layers, pr, global_conf) + self:graph_init(layer_repo, connections) +end diff --git a/nerv/layer/sigmoid.lua b/nerv/layer/sigmoid.lua index 0a8bcdc..5974ffc 100644 --- a/nerv/layer/sigmoid.lua +++ b/nerv/layer/sigmoid.lua @@ -1,19 +1,20 @@ local SigmoidLayer = nerv.class("nerv.SigmoidLayer", "nerv.Layer") function SigmoidLayer:__init(id, global_conf, layer_conf) - self.id = id - self.gconf = global_conf - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out + nerv.Layer.__init(self, id, global_conf, layer_conf) self:check_dim_len(1, 1) -end - -function SigmoidLayer:init() if self.dim_in[1] ~= self.dim_out[1] then nerv.error("mismatching dimensions of input and output") end end +function SigmoidLayer:bind_params() + -- do nothing +end + +function SigmoidLayer:init() +end + function SigmoidLayer:batch_resize(batch_size) -- do nothing end @@ -31,5 +32,5 @@ function SigmoidLayer:back_propagate(bp_err, next_bp_err, input, output) end function SigmoidLayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/softmax.lua b/nerv/layer/softmax.lua index 4205b66..f7a5163 100644 --- a/nerv/layer/softmax.lua +++ b/nerv/layer/softmax.lua @@ -1,13 +1,14 @@ local SoftmaxLayer = nerv.class("nerv.SoftmaxLayer", "nerv.Layer") function SoftmaxLayer:__init(id, global_conf, layer_conf) - self.id = id - self.gconf = global_conf - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out + nerv.Layer.__init(self, id, global_conf, layer_conf) self:check_dim_len(1, 1) -- two inputs: nn output and label end +function SoftmaxLayer:bind_params() + -- do nothing +end + function SoftmaxLayer:init(batch_size) if self.dim_in[1] ~= self.dim_out[1] then nerv.error("mismatching dimensions of input and output") @@ -31,5 +32,5 @@ function SoftmaxLayer:back_propagate(bp_err, next_bp_err, input, output) end function SoftmaxLayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/softmax_ce.lua b/nerv/layer/softmax_ce.lua index d7d650e..7b4a80c 100644 --- a/nerv/layer/softmax_ce.lua +++ b/nerv/layer/softmax_ce.lua @@ -1,15 +1,7 @@ local SoftmaxCELayer = nerv.class("nerv.SoftmaxCELayer", "nerv.Layer") function SoftmaxCELayer:__init(id, global_conf, layer_conf) - self.id = id - self.gconf = global_conf - if self.gconf.use_cpu then - self.mat_type = self.gconf.mmat_type - else - self.mat_type = self.gconf.cumat_type - end - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out + nerv.Layer.__init(self, id, global_conf, layer_conf) self.compressed = layer_conf.compressed if self.compressed == nil then self.compressed = false @@ -17,6 +9,10 @@ function SoftmaxCELayer:__init(id, global_conf, layer_conf) self:check_dim_len(2, -1) -- two inputs: nn output and label end +function SoftmaxCELayer:bind_params() + -- do nothing +end + function SoftmaxCELayer:init(batch_size, chunk_size) if not self.compressed and (self.dim_in[1] ~= self.dim_in[2]) then nerv.error("mismatching dimensions of previous network output and labels") @@ -94,5 +90,5 @@ function SoftmaxCELayer:back_propagate(bp_err, next_bp_err, input, output, t) end function SoftmaxCELayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/tanh.lua b/nerv/layer/tanh.lua index e1c32f2..7a19fc8 100644 --- a/nerv/layer/tanh.lua +++ b/nerv/layer/tanh.lua @@ -1,13 +1,14 @@ local TanhLayer = nerv.class("nerv.TanhLayer", "nerv.Layer") function TanhLayer:__init(id, global_conf, layer_conf) - self.id = id - self.gconf = global_conf - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out + nerv.Layer.__init(self, id, global_conf, layer_conf) self:check_dim_len(1, 1) end +function TanhLayer:bind_params() + -- do nothing +end + function TanhLayer:init() if self.dim_in[1] ~= self.dim_out[1] then nerv.error("mismatching dimensions of input and output") @@ -31,5 +32,5 @@ function TanhLayer:back_propagate(bp_err, next_bp_err, input, output) end function TanhLayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/window.lua b/nerv/layer/window.lua index 4933de0..364929f 100644 --- a/nerv/layer/window.lua +++ b/nerv/layer/window.lua @@ -1,12 +1,15 @@ local WindowLayer = nerv.class("nerv.WindowLayer", "nerv.Layer") function WindowLayer:__init(id, global_conf, layer_conf) - self.id = id - self.gconf = global_conf - self.window = layer_conf.window - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out + nerv.Layer.__init(self, id, global_conf, layer_conf) self:check_dim_len(1, 1) + self:bind_params() +end + +function WindowLayer:bind_params() + self.window = self:find_param("window", self.lconf, self.gconf, + nerv.BiasParam, + {1, self.dim_out[1]}) end function WindowLayer:init() @@ -28,5 +31,5 @@ function WindowLayer:propagate(input, output) end function WindowLayer:get_params() - return nerv.ParamRepo({self.window}) + return nerv.ParamRepo({self.window}, self.loc_type) end |