diff options
-rw-r--r-- | nerv/Makefile | 6 | ||||
-rw-r--r-- | nerv/examples/asr_trainer.lua | 22 | ||||
-rw-r--r-- | nerv/examples/timit_baseline2.lua | 60 | ||||
-rw-r--r-- | nerv/init.lua | 10 | ||||
-rw-r--r-- | nerv/layer/duplicate.lua | 41 | ||||
-rw-r--r-- | nerv/layer/graph.lua | 156 | ||||
-rw-r--r-- | nerv/layer/gru.lua | 4 | ||||
-rw-r--r-- | nerv/layer/identity.lua | 30 | ||||
-rw-r--r-- | nerv/layer/init.lua | 12 | ||||
-rw-r--r-- | nerv/layer/lstm.lua | 52 | ||||
-rw-r--r-- | nerv/layer/rnn.lua | 38 | ||||
-rw-r--r-- | nerv/layer/sigmoid.lua | 6 | ||||
-rw-r--r-- | nerv/nn/init.lua | 2 | ||||
-rw-r--r-- | nerv/nn/layer_dag.lua | 352 | ||||
-rw-r--r-- | nerv/nn/network.lua | 500 | ||||
-rw-r--r-- | nerv/tnn/init.lua | 47 | ||||
-rw-r--r-- | nerv/tnn/sutil.lua | 80 | ||||
-rw-r--r-- | nerv/tnn/tnn.lua | 596 |
18 files changed, 869 insertions, 1145 deletions
diff --git a/nerv/Makefile b/nerv/Makefile index e8bcad2..7921bd9 100644 --- a/nerv/Makefile +++ b/nerv/Makefile @@ -42,9 +42,9 @@ LUA_LIBS := matrix/init.lua io/init.lua init.lua \ layer/init.lua layer/affine.lua layer/sigmoid.lua layer/tanh.lua layer/softmax_ce.lua layer/softmax.lua \ layer/window.lua layer/bias.lua layer/combiner.lua layer/mse.lua \ layer/elem_mul.lua layer/lstm.lua layer/lstm_gate.lua layer/dropout.lua layer/gru.lua \ - nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/layer_dag.lua \ - io/sgd_buffer.lua \ - tnn/init.lua tnn/sutil.lua tnn/tnn.lua + layer/graph.lua layer/rnn.lua layer/duplicate.lua layer/identity.lua \ + nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/network.lua \ + io/sgd_buffer.lua INCLUDE := -I $(LUA_INCDIR) -DLUA_USE_APICHECK CUDA_INCLUDE := -I $(CUDA_BASE)/include/ diff --git a/nerv/examples/asr_trainer.lua b/nerv/examples/asr_trainer.lua index 5bf28bd..6bdf57c 100644 --- a/nerv/examples/asr_trainer.lua +++ b/nerv/examples/asr_trainer.lua @@ -20,6 +20,12 @@ local function build_trainer(ifname) local network = get_network(layer_repo) local global_transf = get_global_transf(layer_repo) local input_order = get_input_order() + + network = nerv.Network("nt", gconf, {network = network}) + network:init(gconf.batch_size, 1) + global_transf = nerv.Network("gt", gconf, {network = global_transf}) + global_transf:init(gconf.batch_size, 1) + local iterative_trainer = function (prefix, scp_file, bp, rebind_param_repo) -- rebind the params if necessary if rebind_param_repo then @@ -32,10 +38,11 @@ local function build_trainer(ifname) -- build buffer local buffer = make_buffer(make_readers(scp_file, layer_repo)) -- initialize the network - network:init(gconf.batch_size) gconf.cnt = 0 err_input = {mat_type(gconf.batch_size, 1)} err_input[1]:fill(1) + network:epoch_init() + global_transf:epoch_init() for data in buffer.get_data, buffer do -- prine stat periodically gconf.cnt = gconf.cnt + 1 @@ -69,10 +76,17 @@ local function build_trainer(ifname) for i = 1, #input do table.insert(err_output, input[i]:create()) end - network:propagate(input, output) + network:mini_batch_init({seq_length = table.vector(gconf.batch_size, 1), + new_seq = {}, + do_train = bp, + input = {input}, + output = {output}, + err_input = {err_input}, + err_output = {err_output}}) + network:propagate() if bp then - network:back_propagate(err_input, err_output, input, output) - network:update(err_input, input, output) + network:back_propagate() + network:update() end -- collect garbage in-time to save GPU memory collectgarbage("collect") diff --git a/nerv/examples/timit_baseline2.lua b/nerv/examples/timit_baseline2.lua index 2d144b5..d783c3d 100644 --- a/nerv/examples/timit_baseline2.lua +++ b/nerv/examples/timit_baseline2.lua @@ -61,35 +61,35 @@ function make_layer_repo(param_repo) layer_repo:add_layers( { - ["nerv.DAGLayer"] = + ["nerv.GraphLayer"] = { global_transf = { dim_in = {440}, dim_out = {440}, - sub_layers = layer_repo, + layer_repo = layer_repo, connections = { - ["<input>[1]"] = "blayer1[1]", - ["blayer1[1]"] = "wlayer1[1]", - ["wlayer1[1]"] = "<output>[1]" + {"<input>[1]", "blayer1[1]", 0}, + {"blayer1[1]", "wlayer1[1]", 0}, + {"wlayer1[1]", "<output>[1]", 0} } }, main = { dim_in = {440}, dim_out = {1959}, - sub_layers = layer_repo, + layer_repo = layer_repo, connections = { - ["<input>[1]"] = "affine0[1]", - ["affine0[1]"] = "sigmoid0[1]", - ["sigmoid0[1]"] = "affine1[1]", - ["affine1[1]"] = "sigmoid1[1]", - ["sigmoid1[1]"] = "affine2[1]", - ["affine2[1]"] = "sigmoid2[1]", - ["sigmoid2[1]"] = "affine3[1]", - ["affine3[1]"] = "sigmoid3[1]", - ["sigmoid3[1]"] = "affine4[1]", - ["affine4[1]"] = "sigmoid4[1]", - ["sigmoid4[1]"] = "affine5[1]", - ["affine5[1]"] = "sigmoid5[1]", - ["sigmoid5[1]"] = "affine6[1]", - ["affine6[1]"] = "<output>[1]" + {"<input>[1]", "affine0[1]", 0}, + {"affine0[1]", "sigmoid0[1]", 0}, + {"sigmoid0[1]", "affine1[1]", 0}, + {"affine1[1]", "sigmoid1[1]", 0}, + {"sigmoid1[1]", "affine2[1]", 0}, + {"affine2[1]", "sigmoid2[1]", 0}, + {"sigmoid2[1]", "affine3[1]", 0}, + {"affine3[1]", "sigmoid3[1]", 0}, + {"sigmoid3[1]", "affine4[1]", 0}, + {"affine4[1]", "sigmoid4[1]", 0}, + {"sigmoid4[1]", "affine5[1]", 0}, + {"affine5[1]", "sigmoid5[1]", 0}, + {"sigmoid5[1]", "affine6[1]", 0}, + {"affine6[1]", "<output>[1]", 0} } } } @@ -97,25 +97,25 @@ function make_layer_repo(param_repo) layer_repo:add_layers( { - ["nerv.DAGLayer"] = + ["nerv.GraphLayer"] = { ce_output = { dim_in = {440, 1}, dim_out = {1}, - sub_layers = layer_repo, + layer_repo = layer_repo, connections = { - ["<input>[1]"] = "main[1]", - ["main[1]"] = "ce_crit[1]", - ["<input>[2]"] = "ce_crit[2]", - ["ce_crit[1]"] = "<output>[1]" + {"<input>[1]", "main[1]", 0}, + {"main[1]", "ce_crit[1]", 0}, + {"<input>[2]", "ce_crit[2]", 0}, + {"ce_crit[1]", "<output>[1]", 0} } }, softmax_output = { dim_in = {440}, dim_out = {1959}, - sub_layers = layer_repo, + layer_repo = layer_repo, connections = { - ["<input>[1]"] = "main[1]", - ["main[1]"] = "softmax[1]", - ["softmax[1]"] = "<output>[1]" + {"<input>[1]", "main[1]", 0}, + {"main[1]", "softmax[1]", 0}, + {"softmax[1]", "<output>[1]", 0} } } } diff --git a/nerv/init.lua b/nerv/init.lua index da7df29..ff944b8 100644 --- a/nerv/init.lua +++ b/nerv/init.lua @@ -347,10 +347,18 @@ function table.extend(tbl1, tbl2) end end +function table.vector(len, fill) + local v = {} + fill = fill or 0 + for i = 1, len do + table.insert(v, fill) + end + return v +end + -- the following lines trigger the initialization of basic modules nerv.include('matrix/init.lua') nerv.include('io/init.lua') nerv.include('layer/init.lua') nerv.include('nn/init.lua') -nerv.include('tnn/init.lua') diff --git a/nerv/layer/duplicate.lua b/nerv/layer/duplicate.lua new file mode 100644 index 0000000..137472b --- /dev/null +++ b/nerv/layer/duplicate.lua @@ -0,0 +1,41 @@ +local DuplicateLayer = nerv.class('nerv.DuplicateLayer', 'nerv.Layer') + +function DuplicateLayer:__init(id, global_conf, layer_conf) + nerv.Layer.__init(self, id, global_conf, layer_conf) + self:check_dim_len(1, -1) + if #self.dim_out < 1 then + nerv.error('no output specified') + end + for i = 1, #self.dim_out do + if self.dim_out[i] ~= self.dim_in[1] then + nerv.error('mismatching dimensions of outputs') + end + end +end + +function DuplicateLayer:init() +end + +function DuplicateLayer:batch_resize() +end + +function DuplicateLayer:propagate(input, output) + for i = 1, #self.dim_out do + output[i]:copy_from(input[1]) + -- FIXME: use reference copy to speed up + end +end + +function DuplicateLayer:back_propagate(bp_err, next_bp_err) + next_bp_err[1]:copy_from(bp_err[1]) + for i = 2, #self.dim_out do + next_bp_err[1]:add(next_bp_err[1], bp_err[i], 1.0, 1.0) + end +end + +function DuplicateLayer:update() +end + +function DuplicateLayer:get_params() + return nerv.ParamRepo({}, self.loc_type) +end diff --git a/nerv/layer/graph.lua b/nerv/layer/graph.lua new file mode 100644 index 0000000..5f42fca --- /dev/null +++ b/nerv/layer/graph.lua @@ -0,0 +1,156 @@ +local GraphLayer = nerv.class('nerv.GraphLayer', 'nerv.Layer') + +function GraphLayer:__init(id, global_conf, layer_conf) + nerv.Layer.__init(self, id, global_conf, layer_conf) + self:graph_init(layer_conf.layer_repo, layer_conf.connections) +end + +local function parse_id(str) + local id, port, _ + _, _, id, port = string.find(str, "([a-zA-Z0-9_.]+)%[([0-9]+)%]") + if id == nil or port == nil then + _, _, id, port = string.find(str, "(.+)%[([0-9]+)%]") + if not (id == "<input>" or id == "<output>") then + nerv.error("wrong format of connection id") + end + end + port = tonumber(port) + return id, port +end + +function GraphLayer:add_prefix(layers, connections) + local function ap(name) + return self.id .. '.' .. name + end + + for layer_type, sublayers in pairs(layers) do + local tmp = {} + for name, layer_config in pairs(sublayers) do + tmp[ap(name)] = layer_config + end + layers[layer_type] = tmp + end + + for i = 1, #connections do + local from, to = connections[i][1], connections[i][2] + if parse_id(from) ~= '<input>' then + connections[i][1] = ap(from) + end + if parse_id(to) ~= '<output>' then + connections[i][2] = ap(to) + end + end +end + +function GraphLayer:discover(id, layer_repo) + if id == '<output>' then + id = '<input>' + end + local layers = self.layers + local ref = layers[id] + if ref == nil then + local layer = layer_repo:get_layer(id) + local dim_in, dim_out = layer:get_dim() + self.layer_num = self.layer_num + 1 + ref = { + layer = layer, + inputs = {}, + outputs = {}, + dim_in = dim_in, + dim_out = dim_out, + id = self.layer_num, + } + layers[id] = ref + end + return ref +end + +function GraphLayer:graph_init(layer_repo, connections) + local layers = {} + layers['<input>'] = { + inputs = {}, + outputs = {}, + dim_in = self.dim_out, + dim_out = self.dim_in, + id = 0, + } + self.layers = layers + self.layer_num = 0 + self.connections = {} + + -- check data dimension between connected ports + for _, edge in pairs(connections) do + local from, to, time = edge[1], edge[2], edge[3] + local id_from, port_from = parse_id(from) + local id_to, port_to = parse_id(to) + local ref_from = self:discover(id_from, layer_repo) + local ref_to = self:discover(id_to, layer_repo) + if ref_from.outputs[port_from] ~= nil then + nerv.error('%s has already been attached', from) + end + if ref_to.inputs[port_to] ~= nil then + nerv.error('%s has already been attached', to) + end + if ref_from.dim_out[port_from] ~= ref_to.dim_in[port_to] then + nerv.error('mismatching data dimension between %s and %s', from, to) + end + if ref_from.id == 0 and ref_to.id == 0 then + nerv.error('short-circuit connection between <input> and <output>') + end + ref_from.outputs[port_from] = true + ref_to.inputs[port_to] = true + table.insert(self.connections, {ref_from.id, port_from, ref_to.id, port_to, time}) + end + + -- check dangling ports + for id, ref in pairs(layers) do + if id ~= '<input>' then + for i = 1, #ref.dim_in do + if ref.inputs[i] == nil then + nerv.error('dangling input port %d of layer %s', i, id) + end + end + for i = 1, #ref.dim_out do + if ref.outputs[i] == nil then + nerv.error('dangling output port %d os layer %s', i, id) + end + end + end + end + for i = 1, #self.dim_in do + if layers['<input>'].outputs[i] == nil then + nerv.error('dangling port %d of layer <input>', i) + end + end + for i = 1, #self.dim_out do + if layers['<input>'].inputs[i] == nil then + nerv.error('dangling port %d of layer <output>', i) + end + end +end + +function GraphLayer:set_attr(name, value) + self[name] = value + for id, ref in pairs(self.layers) do + if id ~= '<input>' then + ref.layer:set_attr(name, value) + end + end +end + +function GraphLayer:get_sublayer(id) + if self.layers[id] == nil or id == '<input>' then + nerv.error('layer with id %s not found', id) + end + return self.layers[id].layer +end + +function GraphLayer:get_params() + local param_repos = {} + for id, ref in pairs(self.layers) do + if id ~= '<input>' then + table.insert(param_repos, ref.layer:get_params()) + end + end + return nerv.ParamRepo.merge(param_repos, self.loc_type) +end diff --git a/nerv/layer/gru.lua b/nerv/layer/gru.lua index a590a67..71718d7 100644 --- a/nerv/layer/gru.lua +++ b/nerv/layer/gru.lua @@ -13,7 +13,7 @@ function GRULayer:__init(id, global_conf, layer_conf) -- prepare a DAGLayer to hold the lstm structure local pr = layer_conf.pr if pr == nil then - pr = nerv.ParamRepo(nil, self.loc_type) + pr = nerv.ParamRepo({}, self.loc_type) end local function ap(str) @@ -102,7 +102,7 @@ end function GRULayer:bind_params() local pr = layer_conf.pr if pr == nil then - pr = nerv.ParamRepo(nil, self.loc_type) + pr = nerv.ParamRepo({}, self.loc_type) end self.lrepo:rebind(pr) end diff --git a/nerv/layer/identity.lua b/nerv/layer/identity.lua new file mode 100644 index 0000000..d56337d --- /dev/null +++ b/nerv/layer/identity.lua @@ -0,0 +1,30 @@ +local IdentityLayer = nerv.class('nerv.IdentityLayer', 'nerv.Layer') + +function IdentityLayer:__init(id, global_conf, layer_conf) + nerv.Layer.__init(self, id, global_conf, layer_conf) + self:check_dim_len(1, 1) + if self.dim_in[1] ~= self.dim_out[1] then + nerv.error('mismatching dimensions of input and output') + end +end + +function IdentityLayer:init() +end + +function IdentityLayer:batch_resize() +end + +function IdentityLayer:propagate(input, output) + output[1]:copy_from(input[1]) +end + +function IdentityLayer:back_propagate(bp_err, next_bp_err) + next_bp_err[1]:copy_from(bp_err[1]) +end + +function IdentityLayer:update() +end + +function IdentityLayer:get_params() + return nerv.ParamRepo({}, self.loc_type) +end diff --git a/nerv/layer/init.lua b/nerv/layer/init.lua index 146ad8c..475ef62 100644 --- a/nerv/layer/init.lua +++ b/nerv/layer/init.lua @@ -85,6 +85,14 @@ function Layer:get_dim() return self.dim_in, self.dim_out end +function Layer:set_attr(name, value) + self[name] = value +end + +function Layer:get_sublayer(id) + nerv.error('primitive layer does not have sublayers') +end + function Layer:find_param(plist, lconf, gconf, p_type, p_dim) if type(plist) == "string" then plist = {plist} @@ -119,6 +127,7 @@ function Layer:find_param(plist, lconf, gconf, p_type, p_dim) return p end +nerv.include('graph.lua') nerv.include('affine.lua') nerv.include('sigmoid.lua') nerv.include('tanh.lua') @@ -133,6 +142,9 @@ nerv.include('lstm.lua') nerv.include('lstm_gate.lua') nerv.include('dropout.lua') nerv.include('gru.lua') +nerv.include('rnn.lua') +nerv.include('duplicate.lua') +nerv.include('identity.lua') -- The following lines are for backward compatibility, and will be removed in -- the future. The use of these names are deprecated. diff --git a/nerv/layer/lstm.lua b/nerv/layer/lstm.lua index d4c9212..641d5dc 100644 --- a/nerv/layer/lstm.lua +++ b/nerv/layer/lstm.lua @@ -8,7 +8,7 @@ function LSTMLayer:__init(id, global_conf, layer_conf) -- prepare a DAGLayer to hold the lstm structure local pr = layer_conf.pr if pr == nil then - pr = nerv.ParamRepo(nil, self.loc_type) + pr = nerv.ParamRepo({}, self.loc_type) end local function ap(str) @@ -18,47 +18,47 @@ function LSTMLayer:__init(id, global_conf, layer_conf) local dout1, dout2, dout3 = self.dim_out[1], self.dim_out[2], self.dim_out[3] local layers = { ["nerv.CombinerLayer"] = { - [ap("inputXDup")] = {{}, {dim_in = {din1}, + [ap("inputXDup")] = {dim_in = {din1}, dim_out = {din1, din1, din1, din1}, - lambda = {1}}}, + lambda = {1}}, - [ap("inputHDup")] = {{}, {dim_in = {din2}, + [ap("inputHDup")] = {dim_in = {din2}, dim_out = {din2, din2, din2, din2}, - lambda = {1}}}, + lambda = {1}}, - [ap("inputCDup")] = {{}, {dim_in = {din3}, + [ap("inputCDup")] = {dim_in = {din3}, dim_out = {din3, din3, din3}, - lambda = {1}}}, + lambda = {1}}, - [ap("mainCDup")] = {{}, {dim_in = {din3, din3}, + [ap("mainCDup")] = {dim_in = {din3, din3}, dim_out = {din3, din3, din3}, - lambda = {1, 1}}}, + lambda = {1, 1}}, }, ["nerv.AffineLayer"] = { - [ap("mainAffineL")] = {{}, {dim_in = {din1, din2}, + [ap("mainAffineL")] = {dim_in = {din1, din2}, dim_out = {dout1}, - pr = pr}}, + pr = pr}, }, ["nerv.TanhLayer"] = { - [ap("mainTanhL")] = {{}, {dim_in = {dout1}, dim_out = {dout1}}}, - [ap("outputTanhL")] = {{}, {dim_in = {dout1}, dim_out = {dout1}}}, + [ap("mainTanhL")] = {dim_in = {dout1}, dim_out = {dout1}}, + [ap("outputTanhL")] = {dim_in = {dout1}, dim_out = {dout1}}, }, ["nerv.LSTMGateLayer"] = { - [ap("forgetGateL")] = {{}, {dim_in = {din1, din2, din3}, - dim_out = {din3}, pr = pr}}, - [ap("inputGateL")] = {{}, {dim_in = {din1, din2, din3}, - dim_out = {din3}, pr = pr}}, - [ap("outputGateL")] = {{}, {dim_in = {din1, din2, din3}, - dim_out = {din3}, pr = pr}}, + [ap("forgetGateL")] = {dim_in = {din1, din2, din3}, + dim_out = {din3}, pr = pr}, + [ap("inputGateL")] = {dim_in = {din1, din2, din3}, + dim_out = {din3}, pr = pr}, + [ap("outputGateL")] = {dim_in = {din1, din2, din3}, + dim_out = {din3}, pr = pr}, }, ["nerv.ElemMulLayer"] = { - [ap("inputGMulL")] = {{}, {dim_in = {din3, din3}, - dim_out = {din3}}}, - [ap("forgetGMulL")] = {{}, {dim_in = {din3, din3}, - dim_out = {din3}}}, - [ap("outputGMulL")] = {{}, {dim_in = {din3, din3}, - dim_out = {din3}}}, + [ap("inputGMulL")] = {dim_in = {din3, din3}, + dim_out = {din3}}, + [ap("forgetGMulL")] = {dim_in = {din3, din3}, + dim_out = {din3}}, + [ap("outputGMulL")] = {dim_in = {din3, din3}, + dim_out = {din3}}, }, } @@ -114,7 +114,7 @@ end function LSTMLayer:bind_params() local pr = layer_conf.pr if pr == nil then - pr = nerv.ParamRepo(nil, self.loc_type) + pr = nerv.ParamRepo({}, self.loc_type) end self.lrepo:rebind(pr) end diff --git a/nerv/layer/rnn.lua b/nerv/layer/rnn.lua new file mode 100644 index 0000000..e59cf5b --- /dev/null +++ b/nerv/layer/rnn.lua @@ -0,0 +1,38 @@ +local RNNLayer = nerv.class('nerv.RNNLayer', 'nerv.GraphLayer') + +function RNNLayer:__init(id, global_conf, layer_conf) + nerv.Layer.__init(self, id, global_conf, layer_conf) + self:check_dim_len(1, 1) + + local din = layer_conf.dim_in[1] + local dout = layer_conf.dim_out[1] + + local pr = layer_conf.pr + if pr == nil then + pr = nerv.ParamRepo({}, self.loc_type) + end + + local layers = { + ['nerv.AffineLayer'] = { + main = {dim_in = {din, dout}, dim_out = {dout}, pr = pr}, + }, + ['nerv.SigmoidLayer'] = { + sigmoid = {dim_in = {dout}, dim_out = {dout}}, + }, + ['nerv.DuplicateLayer'] = { + dup = {dim_in = {dout}, dim_out = {dout, dout}}, + } + } + + local connections = { + {'<input>[1]', 'main[1]', 0}, + {'main[1]', 'sigmoid[1]', 0}, + {'sigmoid[1]', 'dup[1]', 0}, + {'dup[1]', 'main[2]', 1}, + {'dup[2]', '<output>[1]', 0}, + } + + self:add_prefix(layers, connections) + local layer_repo = nerv.LayerRepo(layers, pr, global_conf) + self:graph_init(layer_repo, connections) +end diff --git a/nerv/layer/sigmoid.lua b/nerv/layer/sigmoid.lua index a9f9749..5974ffc 100644 --- a/nerv/layer/sigmoid.lua +++ b/nerv/layer/sigmoid.lua @@ -3,6 +3,9 @@ local SigmoidLayer = nerv.class("nerv.SigmoidLayer", "nerv.Layer") function SigmoidLayer:__init(id, global_conf, layer_conf) nerv.Layer.__init(self, id, global_conf, layer_conf) self:check_dim_len(1, 1) + if self.dim_in[1] ~= self.dim_out[1] then + nerv.error("mismatching dimensions of input and output") + end end function SigmoidLayer:bind_params() @@ -10,9 +13,6 @@ function SigmoidLayer:bind_params() end function SigmoidLayer:init() - if self.dim_in[1] ~= self.dim_out[1] then - nerv.error("mismatching dimensions of input and output") - end end function SigmoidLayer:batch_resize(batch_size) diff --git a/nerv/nn/init.lua b/nerv/nn/init.lua index cbaf52b..1037d05 100644 --- a/nerv/nn/init.lua +++ b/nerv/nn/init.lua @@ -1,3 +1,3 @@ nerv.include('layer_repo.lua') nerv.include('param_repo.lua') -nerv.include('layer_dag.lua') +nerv.include('network.lua') diff --git a/nerv/nn/layer_dag.lua b/nerv/nn/layer_dag.lua deleted file mode 100644 index f999752..0000000 --- a/nerv/nn/layer_dag.lua +++ /dev/null @@ -1,352 +0,0 @@ -local DAGLayer = nerv.class("nerv.DAGLayer", "nerv.Layer") - -local function parse_id(str) - local id, port, _ - _, _, id, port = string.find(str, "([a-zA-Z0-9_.]+)%[([0-9]+)%]") - if id == nil or port == nil then - _, _, id, port = string.find(str, "(.+)%[([0-9]+)%]") - if not (id == "<input>" or id == "<output>") then - nerv.error("wrong format of connection id") - end - end - port = tonumber(port) - return id, port -end - -local function discover(id, layers, layer_repo) - local ref = layers[id] - if id == "<input>" or id == "<output>" then - return nil - end - if ref == nil then - local layer = layer_repo:get_layer(id) - local dim_in, dim_out = layer:get_dim() - ref = { - layer = layer, - inputs = {}, - outputs = {}, - err_inputs = {}, - err_outputs = {}, - next_layers = {}, - input_len = #dim_in, - output_len = #dim_out, - in_deg = 0, - visited = false - } - layers[id] = ref - end - return ref -end - -local function touch_list_by_idx(list, idx) - if list[idx] == nil then - list[idx] = {} - end -end - -function DAGLayer:__init(id, global_conf, layer_conf) - local layers = {} - local inputs = {} - local outputs = {} - local dim_in = layer_conf.dim_in - local dim_out = layer_conf.dim_out - local parsed_conn = {} - for from, to in pairs(layer_conf.connections) do - local id_from, port_from = parse_id(from) - local id_to, port_to = parse_id(to) - local ref_from = discover(id_from, layers, layer_conf.sub_layers) - local ref_to = discover(id_to, layers, layer_conf.sub_layers) - local input_dim, output_dim, _ - if ref_from then - touch_list_by_idx(ref_from.outputs, 1) - if ref_from.outputs[1][port_from] ~= nil then - nerv.error("%s has already been attached", from) - end - end - if ref_to then - touch_list_by_idx(ref_to.inputs, 1) - if ref_to.inputs[1][port_to] ~= nil then - nerv.error("%s has already been attached", to) - end - end - if id_from == "<input>" then - input_dim, _ = ref_to.layer:get_dim() - if dim_in[port_from] ~= input_dim[port_to] then - nerv.error("mismatching data dimension between %s and %s", from, to) - end - inputs[port_from] = {ref_to, port_to} - ref_to.inputs[1][port_to] = inputs -- just a place holder - elseif id_to == "<output>" then - _, output_dim = ref_from.layer:get_dim() - if output_dim[port_from] ~= dim_out[port_to] then - nerv.error("mismatching data dimension between %s and %s", from, to) - end - outputs[port_to] = {ref_from, port_from} - ref_from.outputs[1][port_from] = outputs -- just a place holder - else - _, output_dim = ref_from.layer:get_dim() - input_dim, _ = ref_to.layer:get_dim() - if output_dim[port_from] ~= input_dim[port_to] then - nerv.error("mismatching data dimension between %s and %s", from, to) - end - - table.insert(parsed_conn, - {{ref_from, port_from}, {ref_to, port_to}}) - table.insert(ref_from.next_layers, ref_to) -- add edge - ref_to.in_deg = ref_to.in_deg + 1 -- increase the in-degree of the target layer - end - end - - -- topology sort - local queue = {} - local l = 1 - local r = 1 - for id, ref in pairs(layers) do - if ref.in_deg == 0 then - table.insert(queue, ref) - nerv.info("adding source layer: %s", id) - r = r + 1 - end - end - if l == r then - nerv.error("loop detected") - end - while l < r do - local cur = queue[l] - cur.visited = true - l = l + 1 - for _, nl in pairs(cur.next_layers) do - nl.in_deg = nl.in_deg - 1 - if nl.in_deg == 0 then - table.insert(queue, nl) - r = r + 1 - end - end - end - for i = 1, #queue do - nerv.info("enqueued layer: %s %s", queue[i].layer, queue[i].layer.id) - end - - for id, ref in pairs(layers) do - -- check wether the graph is connected - if ref.visited == false then - nerv.warning("layer %s is ignored", id) - end - end - - nerv.Layer.__init(self, id, global_conf, layer_conf) - self.layers = layers - self.inputs = inputs - self.outputs = outputs - self.parsed_conn = parsed_conn - self.queue = queue -end - -function DAGLayer:bind_params() - -- do nothing (instead of rebinding params for each layer) -end - -function DAGLayer:init(batch_size, chunk_size) - if chunk_size == nil then - chunk_size = 1 - end - for i, conn in ipairs(self.parsed_conn) do - local _, output_dim - local ref_from, port_from, ref_to, port_to - ref_from, port_from = unpack(conn[1]) - ref_to, port_to = unpack(conn[2]) - _, output_dim = ref_from.layer:get_dim() - local dim = 1 - if output_dim[port_from] > 0 then - dim = output_dim[port_from] - end - - for t = 1, chunk_size do - local mid = self.mat_type(batch_size, dim) - local err_mid = mid:create() - touch_list_by_idx(ref_to.inputs, t) - touch_list_by_idx(ref_from.outputs, t) - touch_list_by_idx(ref_from.err_inputs, t) - touch_list_by_idx(ref_to.err_outputs, t) - - ref_from.outputs[t][port_from] = mid - ref_to.inputs[t][port_to] = mid - - ref_from.err_inputs[t][port_from] = err_mid - ref_to.err_outputs[t][port_to] = err_mid - end - end - for id, ref in pairs(self.layers) do - for i = 1, ref.input_len do - if ref.inputs[1][i] == nil then - nerv.error("dangling input port %d of layer %s", i, id) - end - end - for i = 1, ref.output_len do - if ref.outputs[1][i] == nil then - nerv.error("dangling output port %d of layer %s", i, id) - end - end - -- initialize sub layers - ref.layer:init(batch_size, chunk_size) - end - for i = 1, #self.dim_in do - if self.inputs[i] == nil then - nerv.error("dangling port %d of layer <input>", i) - end - end - for i = 1, #self.dim_out do - if self.outputs[i] == nil then - nerv.error("dangling port %d of layer <output>", i) - end - end -end - -function DAGLayer:batch_resize(batch_size, chunk_size) - if chunk_size == nil then - chunk_size = 1 |