From 77b558898a2a29097d8697a59a7d23cd2a52975f Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Mon, 29 Feb 2016 17:46:09 +0800 Subject: graph layer complete --- nerv/Makefile | 1 + nerv/layer/graph.lua | 118 +++++++++++++++++++++++++++++++++++++++++++++++++ nerv/layer/init.lua | 10 +++++ nerv/layer/rnn.lua | 37 ++++++++++++++++ nerv/main.lua | 31 +++++++++++++ nerv/nn/layer_repo.lua | 14 +++--- 6 files changed, 203 insertions(+), 8 deletions(-) create mode 100644 nerv/layer/graph.lua create mode 100644 nerv/layer/rnn.lua create mode 100644 nerv/main.lua (limited to 'nerv') diff --git a/nerv/Makefile b/nerv/Makefile index a2155b9..ba97579 100644 --- a/nerv/Makefile +++ b/nerv/Makefile @@ -34,6 +34,7 @@ LUA_LIBS := matrix/init.lua io/init.lua init.lua \ layer/init.lua layer/affine.lua layer/sigmoid.lua layer/tanh.lua layer/softmax_ce.lua layer/softmax.lua \ layer/window.lua layer/bias.lua layer/combiner.lua layer/mse.lua \ layer/elem_mul.lua layer/lstm.lua layer/lstm_gate.lua layer/dropout.lua layer/gru.lua \ + layer/graph.lua layer/rnn.lua \ nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/layer_dag.lua \ io/sgd_buffer.lua \ tnn/init.lua tnn/sutil.lua tnn/tnn.lua diff --git a/nerv/layer/graph.lua b/nerv/layer/graph.lua new file mode 100644 index 0000000..83cf810 --- /dev/null +++ b/nerv/layer/graph.lua @@ -0,0 +1,118 @@ +local GraphLayer = nerv.class('nerv.GraphLayer', 'nerv.Layer') + +function GraphLayer:__init(id, global_conf, layer_conf) + self.id = id + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self.gconf = global_conf + self:graph_init(layer_conf.layer_repo, layer_conf.connections) +end + +local function parse_id(str) + local id, port, _ + _, _, id, port = string.find(str, "([a-zA-Z0-9_.]+)%[([0-9]+)%]") + if id == nil or port == nil then + _, _, id, port = string.find(str, "(.+)%[([0-9]+)%]") + if not (id == "" or id == "") then + nerv.error("wrong format of connection id") + end + end + port = tonumber(port) + return id, port +end + +local function discover(id, layers, layer_repo) + if id == '' then + id = '' + end + local ref = layers[id] + if ref == nil then + local layer = layer_repo:get_layer(id) + local dim_in, dim_out = layer:get_dim() + ref = { + layer = layer, + inputs = {}, + outputs = {}, + dim_in = dim_in, + dim_out = dim_out, + } + layers[id] = ref + end + return ref +end + +function GraphLayer:graph_init(layer_repo, connections) + self.connections = connections + self.sublayer = nerv.LayerRepo({}, nerv.ParamRepo(), self.gconf) + + -- check data dimension between connected ports + local layers = {} + layers[''] = { + inputs = {}, + outputs = {}, + dim_in = self.dim_out, + dim_out = self.dim_in, + } + for _, edge in pairs(self.connections) do + local from = edge[1] + local to = edge[2] + local id_from, port_from = parse_id(from) + local id_to, port_to = parse_id(to) + local ref_from = discover(id_from, layers, layer_repo) + local ref_to = discover(id_to, layers, layer_repo) + if ref_to.inputs[port_to] ~= nil then + nerv.error('%s has already been attached', to) + end + if ref_from.dim_out[port_from] ~= ref_to.dim_in[port_to] then + nerv.error('mismatching data dimension between %s and %s', from, to) + end + ref_from.outputs[port_from] = true + ref_to.inputs[port_to] = true + end + + -- check dangling ports + for id, ref in pairs(layers) do + if id ~= '' then + for i = 1, #ref.dim_in do + if ref.inputs[i] == nil then + nerv.error('dangling input port %d of layer %s', i, id) + end + end + for i = 1, #ref.dim_out do + if ref.outputs[i] == nil then + nerv.error('dangling output port %d os layer %s', i, id) + end + end + self.sublayer.layers[id] = ref.layer + end + end + for i = 1, #self.dim_in do + if layers[''].outputs[i] == nil then + nerv.error('dangling port %d of layer ', i) + end + end + for i = 1, #self.dim_out do + if layers[''].inputs[i] == nil then + nerv.error('dangling port %d of layer ', i) + end + end +end + +function GraphLayer:set_attr(name, value) + self[name] = value + for id, layer in pairs(self.sublayer.layers) do + layer:set_attr(name, value) + end +end + +function GraphLayer:get_sublayer(id) + return self.sublayer:get_layer(id) +end + +function GraphLayer:get_params() + local param_repos = {} + for id, layer in pairs(self.sublayer.layers) do + table.insert(param_repos, layer:get_params()) + end + return nerv.ParamRepo.merge(param_repos) +end diff --git a/nerv/layer/init.lua b/nerv/layer/init.lua index 54f33ae..5e3395c 100644 --- a/nerv/layer/init.lua +++ b/nerv/layer/init.lua @@ -70,6 +70,14 @@ function Layer:get_dim() return self.dim_in, self.dim_out end +function Layer:set_attr(name, value) + self[name] = value +end + +function Layer:get_sublayer(id) + nerv.error('primitive layer does not have sublayers.') +end + function Layer:find_param(pid_list, lconf, gconf, p_type, p_dim) if type(pid_list) == "string" then pid_list = {pid_list} @@ -101,6 +109,7 @@ function Layer:find_param(pid_list, lconf, gconf, p_type, p_dim) return p end +nerv.include('graph.lua') nerv.include('affine.lua') nerv.include('sigmoid.lua') nerv.include('tanh.lua') @@ -115,6 +124,7 @@ nerv.include('lstm.lua') nerv.include('lstm_gate.lua') nerv.include('dropout.lua') nerv.include('gru.lua') +nerv.include('rnn.lua') -- The following lines are for backward compatibility, and will be removed in -- the future. The use of these names are deprecated. diff --git a/nerv/layer/rnn.lua b/nerv/layer/rnn.lua new file mode 100644 index 0000000..a93530f --- /dev/null +++ b/nerv/layer/rnn.lua @@ -0,0 +1,37 @@ +local RNNLayer = nerv.class('nerv.RNNLayer', 'nerv.GraphLayer') + +function RNNLayer:__init(id, global_conf, layer_conf) + self.id = id + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self.gconf = layer_conf.gconf + self:check_dim_len(1, 1) + + local din = layer_conf.dim_in[1] + local dout = layer_conf.dim_out[1] + + local pr = layer_conf.pr + if pr == nil then + pr = nerv.ParamRepo() + end + + local layers = { + ['nerv.AffineLayer'] = { + main = {dim_in = {din, dout}, dim_out = {dout}, pr = pr}, + }, + ['nerv.SigmoidLayer'] = { + sigmoid = {dim_in = {dout}, dim_out = {dout}}, + }, + } + + local layer_repo = nerv.LayerRepo(layers, pr, global_conf) + + local connections = { + {'[1]', 'main[1]', 0}, + {'main[1]', 'sigmoid[1]', 0}, + {'sigmoid[1]', 'main[2]', 0}, + {'sigmoid[1]', '[1]', 1}, + } + + self:graph_init(layer_repo, connections) +end diff --git a/nerv/main.lua b/nerv/main.lua new file mode 100644 index 0000000..85e291c --- /dev/null +++ b/nerv/main.lua @@ -0,0 +1,31 @@ +print 'Hello' + +local global_conf = { + cumat_type = nerv.CuMatrixFloat, + param_random = function() return 0 end, +} + +local layer_repo = nerv.LayerRepo( + { + ['nerv.RNNLayer'] = { + rnn = {dim_in = {23}, dim_out = {26}}, + }, + ['nerv.AffineLayer'] = { + input = {dim_in = {20}, dim_out = {23}}, + output = {dim_in = {26, 79}, dim_out = {79}}, + }, + ['nerv.SigmoidLayer'] = { + sigmoid = {dim_in = {23}, dim_out = {23}}, + }, + }, nerv.ParamRepo(), global_conf) + +local connections = { + {'[1]', 'input[1]', 0}, + {'input[1]', 'sigmoid[1]', 0}, + {'sigmoid[1]', 'rnn[1]', 0}, + {'rnn[1]', 'output[1]', 0}, + {'output[1]', 'output[2]', 1}, + {'output[1]', '[1]', 0}, +} + +local network = nerv.GraphLayer('network', global_conf, {dim_in = {20}, dim_out = {79}, layer_repo = layer_repo, connections = connections}) diff --git a/nerv/nn/layer_repo.lua b/nerv/nn/layer_repo.lua index 3d3a79f..a169b2b 100644 --- a/nerv/nn/layer_repo.lua +++ b/nerv/nn/layer_repo.lua @@ -12,20 +12,18 @@ function LayerRepo:add_layers(layer_spec, param_repo, global_conf) if layer_type == nil then nerv.error('layer type `%s` not found', ltype) end - for id, spec in pairs(llist) do + for id, layer_config in pairs(llist) do if layers[id] ~= nil then nerv.error("a layer with id %s already exists", id) end nerv.info("create layer: %s", id) - if type(spec[2]) ~= "table" then + if type(layer_config) ~= "table" then nerv.error("layer config table is need") end - layer_config = spec[2] - if type(spec[1]) ~= "table" then - nerv.error("parameter description table is needed") - end - for pname, pid in pairs(spec[1]) do - layer_config[pname] = param_repo:get_param(pid) + if type(layer_config.params) == "table" then + for pname, pid in pairs(layer_config.params) do + layer_config[pname] = param_repo:get_param(pid) + end end if layer_config.pr == nil then layer_config.pr = param_repo -- cgit v1.2.3 From 1a424bf9233f9b1c67ef135f1a3892b7986c5564 Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Mon, 29 Feb 2016 22:05:43 +0800 Subject: add network & fix graph_layer --- nerv/Makefile | 2 +- nerv/layer/graph.lua | 46 ++++++++++++++++++++++------------- nerv/layer/rnn.lua | 4 ++-- nerv/main.lua | 4 +++- nerv/nn/init.lua | 1 + nerv/nn/network.lua | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 105 insertions(+), 20 deletions(-) create mode 100644 nerv/nn/network.lua (limited to 'nerv') diff --git a/nerv/Makefile b/nerv/Makefile index ba97579..c9c3e42 100644 --- a/nerv/Makefile +++ b/nerv/Makefile @@ -35,7 +35,7 @@ LUA_LIBS := matrix/init.lua io/init.lua init.lua \ layer/window.lua layer/bias.lua layer/combiner.lua layer/mse.lua \ layer/elem_mul.lua layer/lstm.lua layer/lstm_gate.lua layer/dropout.lua layer/gru.lua \ layer/graph.lua layer/rnn.lua \ - nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/layer_dag.lua \ + nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/layer_dag.lua nn/network.lua \ io/sgd_buffer.lua \ tnn/init.lua tnn/sutil.lua tnn/tnn.lua diff --git a/nerv/layer/graph.lua b/nerv/layer/graph.lua index 83cf810..36a9672 100644 --- a/nerv/layer/graph.lua +++ b/nerv/layer/graph.lua @@ -21,20 +21,23 @@ local function parse_id(str) return id, port end -local function discover(id, layers, layer_repo) +function GraphLayer:discover(id, layer_repo) if id == '' then id = '' end + local layers = self.layers local ref = layers[id] if ref == nil then local layer = layer_repo:get_layer(id) local dim_in, dim_out = layer:get_dim() + self.layer_num = self.layer_num + 1 ref = { layer = layer, inputs = {}, outputs = {}, dim_in = dim_in, dim_out = dim_out, + id = self.layer_num, } layers[id] = ref end @@ -42,32 +45,37 @@ local function discover(id, layers, layer_repo) end function GraphLayer:graph_init(layer_repo, connections) - self.connections = connections - self.sublayer = nerv.LayerRepo({}, nerv.ParamRepo(), self.gconf) - - -- check data dimension between connected ports local layers = {} layers[''] = { inputs = {}, outputs = {}, dim_in = self.dim_out, dim_out = self.dim_in, + id = 0, } - for _, edge in pairs(self.connections) do - local from = edge[1] - local to = edge[2] + self.layers = layers + self.layer_num = 0 + self.connections = {} + + -- check data dimension between connected ports + for _, edge in pairs(connections) do + local from, to, time = edge[1], edge[2], edge[3] local id_from, port_from = parse_id(from) local id_to, port_to = parse_id(to) - local ref_from = discover(id_from, layers, layer_repo) - local ref_to = discover(id_to, layers, layer_repo) + local ref_from = self:discover(id_from, layer_repo) + local ref_to = self:discover(id_to, layer_repo) if ref_to.inputs[port_to] ~= nil then nerv.error('%s has already been attached', to) end if ref_from.dim_out[port_from] ~= ref_to.dim_in[port_to] then nerv.error('mismatching data dimension between %s and %s', from, to) end + if ref_from.id == 0 and ref_to.id == 0 then + nerv.error('short-circuit connection between and ') + end ref_from.outputs[port_from] = true ref_to.inputs[port_to] = true + table.insert(self.connections, {ref_from.id, port_from, ref_to.id, port_to, time}) end -- check dangling ports @@ -83,7 +91,6 @@ function GraphLayer:graph_init(layer_repo, connections) nerv.error('dangling output port %d os layer %s', i, id) end end - self.sublayer.layers[id] = ref.layer end end for i = 1, #self.dim_in do @@ -100,19 +107,26 @@ end function GraphLayer:set_attr(name, value) self[name] = value - for id, layer in pairs(self.sublayer.layers) do - layer:set_attr(name, value) + for id, ref in pairs(self.layers) do + if id ~= '' then + ref.layer:set_attr(name, value) + end end end function GraphLayer:get_sublayer(id) - return self.sublayer:get_layer(id) + if self.layers[id] == nil or id == '' then + nerv.error('layer with id %s not found', id) + end + return self.layers[id].layer end function GraphLayer:get_params() local param_repos = {} - for id, layer in pairs(self.sublayer.layers) do - table.insert(param_repos, layer:get_params()) + for id, ref in pairs(self.layers) do + if id ~= '' then + table.insert(param_repos, ref.layer:get_params()) + end end return nerv.ParamRepo.merge(param_repos) end diff --git a/nerv/layer/rnn.lua b/nerv/layer/rnn.lua index a93530f..8816891 100644 --- a/nerv/layer/rnn.lua +++ b/nerv/layer/rnn.lua @@ -29,8 +29,8 @@ function RNNLayer:__init(id, global_conf, layer_conf) local connections = { {'[1]', 'main[1]', 0}, {'main[1]', 'sigmoid[1]', 0}, - {'sigmoid[1]', 'main[2]', 0}, - {'sigmoid[1]', '[1]', 1}, + {'sigmoid[1]', 'main[2]', 1}, + {'sigmoid[1]', '[1]', 0}, } self:graph_init(layer_repo, connections) diff --git a/nerv/main.lua b/nerv/main.lua index 85e291c..0633e87 100644 --- a/nerv/main.lua +++ b/nerv/main.lua @@ -28,4 +28,6 @@ local connections = { {'output[1]', '[1]', 0}, } -local network = nerv.GraphLayer('network', global_conf, {dim_in = {20}, dim_out = {79}, layer_repo = layer_repo, connections = connections}) +local graph = nerv.GraphLayer('network', global_conf, {dim_in = {20}, dim_out = {79}, layer_repo = layer_repo, connections = connections}) + +local network = nerv.Network(graph) diff --git a/nerv/nn/init.lua b/nerv/nn/init.lua index cbaf52b..c32ea09 100644 --- a/nerv/nn/init.lua +++ b/nerv/nn/init.lua @@ -1,3 +1,4 @@ nerv.include('layer_repo.lua') nerv.include('param_repo.lua') nerv.include('layer_dag.lua') +nerv.include('network.lua') diff --git a/nerv/nn/network.lua b/nerv/nn/network.lua new file mode 100644 index 0000000..6cee08b --- /dev/null +++ b/nerv/nn/network.lua @@ -0,0 +1,68 @@ +local network = nerv.class('nerv.Network') + +function network:__init(graph) + self.layers = {} + self.socket = self:compile(graph) + for i = 1, #self.layers do + print(self.layers[i].layer.id) + local _, dim_out = self.layers[i].layer:get_dim() + for j = 1, #dim_out do + for k = 1, #self.layers[i].connections[j] do + local connections = self.layers[i].connections[j][k] + print(i, connections[1], connections[2], connections[3]) + end + end + end +end + +function network:compile(layer) + local socket = {inputs = {}, outputs = {}} + if not nerv.is_type(layer, 'nerv.GraphLayer') then + table.insert(self.layers, {layer = layer, connections = {}}) + local id = #self.layers + local dim_in, dim_out = layer:get_dim() + for i = 1, #dim_in do + socket.inputs[i] = {{id, i, 0}} + end + for i = 1, #dim_out do + socket.outputs[i] = {id, i, 0} + self.layers[id].connections[i] = {} + end + else + local sublayer_socket = {} + for id, sublayer in pairs(layer.layers) do + if id ~= '' then + sublayer_socket[sublayer.id] = self:compile(sublayer.layer) + end + end + local dim_in, _ = layer:get_dim() + for i = 1, #dim_in do + socket.inputs[i] = {} + end + for _, edge in pairs(layer.connections) do + -- id = 0 means or + local id_from, port_from = edge[1], edge[2] + local id_to, port_to = edge[3], edge[4] + local time = edge[5] + if id_from == 0 then + for _, input in pairs(sublayer_socket[id_to].inputs[port_to]) do + local id, port, t = input[1], input[2], input[3] + time + table.insert(socket.inputs[port_from], {id, port, t}) + end + else + local output = sublayer_socket[id_from].outputs[port_from] + local id, port, t = output[1], output[2], output[3] + time + if id_to == 0 then + socket.outputs[port_to] = {id, port, t} + else + local connections = self.layers[id].connections[port] + for _, input in pairs(sublayer_socket[id_to].inputs[port_to]) do + local id1, port1, t1 = input[1], input[2], input[3] + table.insert(connections, {id1, port1, t + t1}) + end + end + end + end + end + return socket +end -- cgit v1.2.3 From 2ea3e139af91eb894d904d7a956e28619b1a70f6 Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Tue, 1 Mar 2016 20:00:53 +0800 Subject: network init complete --- nerv/Makefile | 2 +- nerv/layer/duplicate.lua | 40 ++++++ nerv/layer/graph.lua | 3 + nerv/layer/init.lua | 1 + nerv/layer/rnn.lua | 8 +- nerv/main.lua | 18 ++- nerv/nn/network.lua | 324 +++++++++++++++++++++++++++++++++++++++++++---- 7 files changed, 367 insertions(+), 29 deletions(-) create mode 100644 nerv/layer/duplicate.lua (limited to 'nerv') diff --git a/nerv/Makefile b/nerv/Makefile index c9c3e42..0a2aa86 100644 --- a/nerv/Makefile +++ b/nerv/Makefile @@ -34,7 +34,7 @@ LUA_LIBS := matrix/init.lua io/init.lua init.lua \ layer/init.lua layer/affine.lua layer/sigmoid.lua layer/tanh.lua layer/softmax_ce.lua layer/softmax.lua \ layer/window.lua layer/bias.lua layer/combiner.lua layer/mse.lua \ layer/elem_mul.lua layer/lstm.lua layer/lstm_gate.lua layer/dropout.lua layer/gru.lua \ - layer/graph.lua layer/rnn.lua \ + layer/graph.lua layer/rnn.lua layer/duplicate.lua\ nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/layer_dag.lua nn/network.lua \ io/sgd_buffer.lua \ tnn/init.lua tnn/sutil.lua tnn/tnn.lua diff --git a/nerv/layer/duplicate.lua b/nerv/layer/duplicate.lua new file mode 100644 index 0000000..58758e8 --- /dev/null +++ b/nerv/layer/duplicate.lua @@ -0,0 +1,40 @@ +local DuplicateLayer = nerv.class('nerv.DuplicateLayer', 'nerv.Layer') + +function DuplicateLayer:__init(id, global_conf, layer_conf) + self.id = id + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self.gconf = global_conf + self:check_dim_len(1, -1) + if #self.dim_out < 1 then + nerv.error('no output specified') + end + for i = 1, #self.dim_out do + if self.dim_out[i] ~= self.dim_in[1] then + nerv.error('mismatching dimensions of outputs') + end + end +end + +function DuplicateLayer:init(batch_size) +end + +function DuplicateLayer:batch_resize(batch_size) +end + +function DuplicateLayer:update(bp_err, input, output) +end + +function DuplicateLayer:propagate(input, output) + for i = 1, #self.dim_out do + output[i]:copy_from(input[1]) + -- FIXME: use reference copy to speed up + end +end + +function DuplicateLayer:back_propagate(bp_err, next_bp_err, input, output) + next_bp_err:copy_from(bp_err[1]) + for i = 2, #self.dim_out do + next_bp_err:add(next_bp_err, bp_err[i], 1.0, 1.0) + end +end diff --git a/nerv/layer/graph.lua b/nerv/layer/graph.lua index 36a9672..d72d849 100644 --- a/nerv/layer/graph.lua +++ b/nerv/layer/graph.lua @@ -64,6 +64,9 @@ function GraphLayer:graph_init(layer_repo, connections) local id_to, port_to = parse_id(to) local ref_from = self:discover(id_from, layer_repo) local ref_to = self:discover(id_to, layer_repo) + if ref_from.outputs[port_from] ~= nil then + nerv.error('%s has already been attached', from) + end if ref_to.inputs[port_to] ~= nil then nerv.error('%s has already been attached', to) end diff --git a/nerv/layer/init.lua b/nerv/layer/init.lua index 5e3395c..6f26d4d 100644 --- a/nerv/layer/init.lua +++ b/nerv/layer/init.lua @@ -125,6 +125,7 @@ nerv.include('lstm_gate.lua') nerv.include('dropout.lua') nerv.include('gru.lua') nerv.include('rnn.lua') +nerv.include('duplicate.lua') -- The following lines are for backward compatibility, and will be removed in -- the future. The use of these names are deprecated. diff --git a/nerv/layer/rnn.lua b/nerv/layer/rnn.lua index 8816891..806ac58 100644 --- a/nerv/layer/rnn.lua +++ b/nerv/layer/rnn.lua @@ -22,6 +22,9 @@ function RNNLayer:__init(id, global_conf, layer_conf) ['nerv.SigmoidLayer'] = { sigmoid = {dim_in = {dout}, dim_out = {dout}}, }, + ['nerv.DuplicateLayer'] = { + dup = {dim_in = {dout}, dim_out = {dout, dout}}, + } } local layer_repo = nerv.LayerRepo(layers, pr, global_conf) @@ -29,8 +32,9 @@ function RNNLayer:__init(id, global_conf, layer_conf) local connections = { {'[1]', 'main[1]', 0}, {'main[1]', 'sigmoid[1]', 0}, - {'sigmoid[1]', 'main[2]', 1}, - {'sigmoid[1]', '[1]', 0}, + {'sigmoid[1]', 'dup[1]', 0}, + {'dup[1]', 'main[2]', 1}, + {'dup[2]', '[1]', 0}, } self:graph_init(layer_repo, connections) diff --git a/nerv/main.lua b/nerv/main.lua index 0633e87..5cb7d07 100644 --- a/nerv/main.lua +++ b/nerv/main.lua @@ -17,6 +17,12 @@ local layer_repo = nerv.LayerRepo( ['nerv.SigmoidLayer'] = { sigmoid = {dim_in = {23}, dim_out = {23}}, }, + ['nerv.SoftmaxLayer'] = { + softmax = {dim_in = {79}, dim_out = {79}}, + }, + ['nerv.DuplicateLayer'] = { + dup = {dim_in = {79}, dim_out = {79, 79}}, + }, }, nerv.ParamRepo(), global_conf) local connections = { @@ -24,10 +30,14 @@ local connections = { {'input[1]', 'sigmoid[1]', 0}, {'sigmoid[1]', 'rnn[1]', 0}, {'rnn[1]', 'output[1]', 0}, - {'output[1]', 'output[2]', 1}, - {'output[1]', '[1]', 0}, + {'output[1]', 'dup[1]', 0}, + {'dup[1]', 'output[2]', -1}, + {'dup[2]', 'softmax[1]', 0}, + {'softmax[1]', '[1]', 0}, } -local graph = nerv.GraphLayer('network', global_conf, {dim_in = {20}, dim_out = {79}, layer_repo = layer_repo, connections = connections}) +local graph = nerv.GraphLayer('graph', global_conf, {dim_in = {20}, dim_out = {79}, layer_repo = layer_repo, connections = connections}) + +local network = nerv.Network('network', global_conf, {network = graph}) -local network = nerv.Network(graph) +network:init(2,5) diff --git a/nerv/nn/network.lua b/nerv/nn/network.lua index 6cee08b..01290e7 100644 --- a/nerv/nn/network.lua +++ b/nerv/nn/network.lua @@ -1,15 +1,47 @@ local network = nerv.class('nerv.Network') -function network:__init(graph) +function network:__init(id, global_conf, network_conf) + self.id = id + self.dim_in = network_conf.network.dim_in + self.dim_out = network_conf.network.dim_out + self.gconf = global_conf + if self.gconf.use_cpu then + self.mat_type = self.gconf.mmat_type + else + self.mat_type = self.gconf.cumat_type + end + self.clip = network_conf.clip + self.nn_act_default = network_conf.nn_act_default + if self.nn_act_default == nil then + self.nn_act_default = 0 + end self.layers = {} - self.socket = self:compile(graph) + self.input_conn = {} + self.output_conn = {} + self.socket = self:compile(network_conf.network) + for i = 1, #self.dim_in do + local edge = self.socket.inputs[i] + local id, port, time = edge[1], edge[2], edge[3] + if self.input_conn[id][port] ~= nil then + nerv.error('duplicate edge') + end + self.input_conn[id][port] = {0, i, time} + end + for i = 1, #self.dim_out do + local edge = self.socket.outputs[i] + local id, port, time = edge[1], edge[2], edge[3] + if self.output_conn[id][port] ~= nil then + nerv.error('duplicate edge') + end + self.output_conn[id][port] = {0, i, time} + end + self.delay = 0 for i = 1, #self.layers do - print(self.layers[i].layer.id) - local _, dim_out = self.layers[i].layer:get_dim() - for j = 1, #dim_out do - for k = 1, #self.layers[i].connections[j] do - local connections = self.layers[i].connections[j][k] - print(i, connections[1], connections[2], connections[3]) + local dim_in, _ = self.layers[i]:get_dim() + for j = 1, #dim_in do + local time = self.input_conn[i][j][3] + if math.abs(time) > self.delay then + self.delay = math.abs(time) end end end @@ -18,15 +50,16 @@ end function network:compile(layer) local socket = {inputs = {}, outputs = {}} if not nerv.is_type(layer, 'nerv.GraphLayer') then - table.insert(self.layers, {layer = layer, connections = {}}) + table.insert(self.layers, layer) local id = #self.layers + self.input_conn[id] = {} + self.output_conn[id] = {} local dim_in, dim_out = layer:get_dim() for i = 1, #dim_in do - socket.inputs[i] = {{id, i, 0}} + socket.inputs[i] = {id, i, 0} end for i = 1, #dim_out do socket.outputs[i] = {id, i, 0} - self.layers[id].connections[i] = {} end else local sublayer_socket = {} @@ -35,34 +68,281 @@ function network:compile(layer) sublayer_socket[sublayer.id] = self:compile(sublayer.layer) end end - local dim_in, _ = layer:get_dim() - for i = 1, #dim_in do - socket.inputs[i] = {} - end for _, edge in pairs(layer.connections) do -- id = 0 means or local id_from, port_from = edge[1], edge[2] local id_to, port_to = edge[3], edge[4] local time = edge[5] if id_from == 0 then - for _, input in pairs(sublayer_socket[id_to].inputs[port_to]) do - local id, port, t = input[1], input[2], input[3] + time - table.insert(socket.inputs[port_from], {id, port, t}) + if socket.inputs[port_from] ~= nil then + nerv.error('duplicate input socket') end + local input = sublayer_socket[id_to].inputs[port_to] + local id, port, t = input[1], input[2], input[3] + time + socket.inputs[port_from] = {id, port, t} else local output = sublayer_socket[id_from].outputs[port_from] local id, port, t = output[1], output[2], output[3] + time if id_to == 0 then + if socket.outputs[port_to] ~= nil then + nerv.error('duplicate output socket') + end socket.outputs[port_to] = {id, port, t} else - local connections = self.layers[id].connections[port] - for _, input in pairs(sublayer_socket[id_to].inputs[port_to]) do - local id1, port1, t1 = input[1], input[2], input[3] - table.insert(connections, {id1, port1, t + t1}) + local input = sublayer_socket[id_to].inputs[port_to] + local id1, port1, t1 = input[1], input[2], input[3] + if self.input_conn[id1][port1] ~= nil or self.output_conn[id][port] ~= nil then + nerv.error('duplicate edge') end + self.input_conn[id1][port1] = {id, port, t + t1} + self.output_conn[id][port] = {id1, port1, t + t1} end end end end return socket end + +function network:init(batch_size, chunk_size) + self.batch_size = batch_size + self.chunk_size = chunk_size + + self:topsort() + + self:make_initial_store() + collectgarbage('collect') +end + +function network:topsort() + nerv.info('Network topology sort') + local degree = {} + for t = 1, self.chunk_size do + degree[t] = {} + for i = 1, #self.layers do + degree[t][i] = 0 + end + end + + for t = 1, self.chunk_size do + for i = 1, #self.layers do + local _, dim_out = self.layers[i]:get_dim() + for j = 1, #dim_out do + if self.output_conn[i][j] ~= nil then + local edge = self.output_conn[i][j] + local id, _, time = edge[1], edge[2], edge[3] + t + if time >= 1 and time <= self.chunk_size and id ~= 0 then + degree[time][id] = degree[time][id] + 1 + end + end + end + end + end + + self.queue = {} + local l = 1 + local r = 0 + for t = 1, self.chunk_size do + for i = 1, #self.layers do + if degree[t][i] == 0 then + r = r + 1 + self.queue[r] = {chunk = t, id = i} + end + end + end + while l<=r do + local t, i = self.queue[l].chunk, self.queue[l].id + l = l + 1 + local _, dim_out = self.layers[i]:get_dim() + for j = 1, #dim_out do + if self.output_conn[i][j] ~= nil then + local edge = self.output_conn[i][j] + local id, _, time = edge[1], edge[2], edge[3] + t + if time >= 1 and time <= self.chunk_size and id ~= 0 then + degree[time][id] = degree[time][id] - 1 + if degree[time][id] == 0 then + r = r + 1 + self.queue[r] = {chunk = time, id = id} + end + end + end + end + end + + if r ~= self.chunk_size * #self.layers then + nerv.error('loop detected') + end +end + +function network:make_initial_store() + nerv.info('Network initing storage') + + -- allocate memory + local memory = {} + local err_memory = {} + for t = 1 - self.delay, self.chunk_size + self.delay do + memory[t] = {} + err_memory[t] = {} + for i = 1, #self.layers do + memory[t][i] = {} + err_memory[t][i] = {} + local dim_in, dim_out = self.layers[i]:get_dim() + for j = 1, #dim_in do + err_memory[t][i][j] = self.mat_type(self.batch_size, dim_in[j]) + err_memory[t][i][j]:fill(0) + end + for j = 1, #dim_out do + memory[t][i][j] = self.mat_type(self.batch_size, dim_out[j]) + memory[t][i][j]:fill(self.nn_act_default) + end + end + -- memory[t][0] stores network input + memory[t][0] = {} + for j = 1, #self.dim_in do + memory[t][0][j] = self.mat_type(self.batch_size, self.dim_in[j]) + memory[t][0][j]:fill(self.nn_act_default) + end + -- err_memory[t][0] stores network err_input + err_memory[t][0] = {} + for j = 1, #self.dim_out do + err_memory[t][0][j] = self.mat_type(self.batch_size, self.dim_out[j]) + err_memory[t][0][j]:fill(0) + end + end + + -- connect memory and reference + self.input = {} + self.output = {} + self.err_input = {} + self.err_output = {} + for t = 1, self.chunk_size do + self.input[t] = {} + self.output[t] = {} + self.err_input[t] = {} + self.err_output[t] = {} + for i = 1, #self.layers do + self.input[t][i] = {} + self.output[t][i] = {} + self.err_input[t][i] = {} + self.err_output[t][i] = {} + local dim_in, dim_out = self.layers[i]:get_dim() + for j = 1, #dim_in do + local edge = self.input_conn[i][j] + local id, port, time = edge[1], edge[2], edge[3] + if id ~= 0 or t - time < 1 or t - time > self.chunk_size then + self.input[t][i][j] = memory[t - time][id][port] + end + if id ~= 0 then + self.err_output[t][i][j] = err_memory[t][i][j] + end + end + for j = 1, #dim_out do + local edge = self.output_conn[i][j] + local id, port, time = edge[1], edge[2], edge[3] + if id ~= 0 then + self.output[t][i][j] = memory[t][i][j] + end + if id ~= 0 or t + time < 1 or t + time > self.chunk_size then + self.err_input[t][i][j] = err_memory[t + time][id][port] + end + end + end + end + + -- check dangling reference + for t = 1, self.chunk_size do + for i = 1, #self.dim_in do + local edge = self.socket.inputs[i] + local id, port, time = edge[1], edge[2], edge[3] + if t + time >= 1 and t + time <= self.chunk_size then + if self.input[t + time][id][port] ~= nil then + nerv.error('input reference not nil') + end + self.input[t + time][id][port] = true -- just a place holder + if self.err_output[t + time][id][port] ~= nil then + nerv.error('err_output reference not nil') + end + self.err_output[t + time][id][port] = true -- just a place holder + end + end + for i = 1, #self.dim_out do + local edge = self.socket.outputs[i] + local id, port, time = edge[1], edge[2], edge[3] + if t - time >= 1 and t - time <= self.chunk_size then + if self.output[t - time][id][port] ~= nil then + nerv.error('output reference not nil') + end + self.output[t - time][id][port] = true -- just a place holder + if self.err_input[t - time][id][port] ~= nil then + nerv.error('err_output reference not nil') + end + self.err_input[t - time][id][port] = true -- just a place holder + end + end + end + for t = 1, self.chunk_size do + for i = 1, #self.layers do + local dim_in, dim_out = self.layers[i]:get_dim() + for j = 1, #dim_in do + if self.input[t][i][j] == nil then + nerv.error('input reference dangling') + end + if self.err_output[t][i][j] == nil then + nerv.error('err_output reference dangling') + end + end + for j = 1, #dim_out do + if self.output[t][i][j] == nil then + nerv.error('output reference dangling') + end + if self.err_input[t][i][j] == nil then + nerv.error('err_input reference dangling') + end + end + end + end + + -- allocate reference for legacy of previous mini-batch + self.legacy = {} + for t = 1 - self.delay, 0 do + self.legacy[t] = {} + for i = 1, #self.layers do + self.legacy[t][i] = {} + local _, dim_out = self.layers[i]:get_dim() + for j = 1, #dim_out do + self.legacy[t][i][j] = memory[t][i][j] + end + end + end +end + +function network:mini_batch_init(information) + self.info = information + self.max_chunk = 0 + for i = 1, self.batch_size do + if self.info.seq_length[i] > self.max_chunk then + self.max_chunk = self.info.seq_length[i] + end + end + for t = 1 - self.delay, 0 do + for i = 1, #self.layers do + local _, dim_out = self.layers[i]:get_dim() + for j = 1, #dim_out do + self.output[t][i][j]:copy_from(self.output[t + self.chunk_size][i][j]) + end + end + end + for t = self.max_chunk + 1, self.max_chunk + self.delay do + if t > self.chunk_size then + break + end + for i = 1, #self.layers do + local dim_in, _ = self.layers[i]:get_dim() + for j = 1, #dim_in do + self.err_output[t][i][j]:fill(0) + end + end + end +end + +function network:propagate(input, output) +end -- cgit v1.2.3 From 31e575379fa46eb8f76f00ba62e11626ed67ca72 Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Wed, 2 Mar 2016 13:07:20 +0800 Subject: network complete --- nerv/nn/network.lua | 124 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 119 insertions(+), 5 deletions(-) (limited to 'nerv') diff --git a/nerv/nn/network.lua b/nerv/nn/network.lua index 01290e7..e1a9629 100644 --- a/nerv/nn/network.lua +++ b/nerv/nn/network.lua @@ -111,6 +111,10 @@ function network:init(batch_size, chunk_size) self:make_initial_store() collectgarbage('collect') + + for i = 1, #self.layers do + self.layers[i]:init(batch_size, chunk_size) + end end function network:topsort() @@ -315,23 +319,86 @@ function network:make_initial_store() end end +function network:set_input(input) + for t = 1, #self.chunk_size do + for i = 1, #self.dim_in do + local edge = self.socket.inputs[i] + local id, port, time = edge[1], edge[2], edge[3] + if t + time >= 1 and t + time <= self.chunk_size then + self.input[t + time][id][port] = input[t][i] + end + end + end +end + +function network:set_output(output) + for t = 1, #self.chunk_size do + for i = 1, #self.dim_out do + local edge = self.socket.outputs[i] + local id, port, time = edge[1], edge[2], edge[3] + if t - time >= 1 and t - time <= self.chunk_size then + self.output[t - time][id][port] = output[t][i] + end + end + end +end + +function network:set_err_input(err_input) + for t = 1, #self.chunk_size do + for i = 1, #self.dim_out do + local edge = self.socket.outputs[i] + local id, port, time = edge[1], edge[2], edge[3] + if t - time >= 1 and t - time <= self.chunk_size then + self.err_input[t - time][id][port] = err_input[t][i] + end + end + end +end + +function network:set_err_output(err_output) + for t = 1, self.chunk_size do + for i = 1, #self.dim_in do + local edge = self.socket.inputs[i] + local id, port, time = edge[1], edge[2], edge[3] + if t + time >= 1 and t + time <= self.chunk_size then + self.err_output[t + time][id][port] = err_output[t][i] + end + end + end +end + function network:mini_batch_init(information) self.info = information - self.max_chunk = 0 + self.max_length = 0 + self.border = {} + for i = 1, self.chunk_size do + self.border[i] = {} + end for i = 1, self.batch_size do - if self.info.seq_length[i] > self.max_chunk then - self.max_chunk = self.info.seq_length[i] + if self.info.seq_length[i] > self.max_length then + self.max_length = self.info.seq_length[i] + end + for t = 1, self.delay do + local chunk = self.info.seq_length[i] + t + if chunk > self.chunk_size then + break + end + table.insert(self.border[chunk], i) end end for t = 1 - self.delay, 0 do for i = 1, #self.layers do local _, dim_out = self.layers[i]:get_dim() for j = 1, #dim_out do - self.output[t][i][j]:copy_from(self.output[t + self.chunk_size][i][j]) + self.legacy[t][i][j]:copy_from(self.output[t + self.chunk_size][i][j]) + for k = 1, #self.info.new_seq do + local batch = self.info.new_seq[k] + self.legacy[t][i][j][batch - 1]:fill(self.nn_act_default) + end end end end - for t = self.max_chunk + 1, self.max_chunk + self.delay do + for t = self.max_length + 1, self.max_length + self.delay do if t > self.chunk_size then break end @@ -345,4 +412,51 @@ function network:mini_batch_init(information) end function network:propagate(input, output) + network:set_input(input) + network:set_output(output) + for i = 1, #self.queue do + local t, id = self.queue[i].chunk, self.queue[i].id + if t <= self.max_length then + self.layers[id]:propagate(self.input[t][id], self.output[t][id], t) + end + for j = 1, #self.border[t] do + local batch = self.border[t][j] + local _, dim_out = self.layers[id]:get_dim() + for k = 1, #dim_out do + self.output[t][id][k][batch - 1]:fill(self.nn_act_default) + end + end + end +end + +function network:back_propagate(bp_err, next_bp_err, input, output) + network:set_input(input) + network:set_output(output) + network:set_err_input(bp_err) + network:set_err_output(next_bp_err) + for i = #self.queue, 1, -1 do + local t, id = self.queue[i].chunk, self.queue[i].id + if t <= self.max_length then + for j = 1, #self.border[t] do + local batch = self.border[t][j] + local dim_in, _ = self.layers[id]:get_dim() + for k = 1, #dim_in do + self.err_input[t][id][k][batch - 1]:fill(0) + end + end + self.layers[id]:back_propagate(self.err_input[t][id], self.err_output[t][id], self.input[t][id], self.output[t][id], t) + end + end +end + +function network:update(bp_err, input, output) + network:set_input(input) + network:set_output(output) + network:set_err_input(bp_err) + for i = 1, #self.queue do + local t, id = self.queue[i].chunk, self.queue[i].id + if t <= self.max_length then + self.layers[id]:update(self.err_input[t][id], self.input[t][id], self.output[t][id], t) + end + end end -- cgit v1.2.3 From a87f8954c97cf633a0100c9108764bca8c43a083 Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Wed, 2 Mar 2016 15:38:55 +0800 Subject: add identity layer --- nerv/layer/duplicate.lua | 12 ++++++------ nerv/layer/identity.lua | 33 +++++++++++++++++++++++++++++++++ nerv/nn/network.lua | 18 ++++++++++++++---- 3 files changed, 53 insertions(+), 10 deletions(-) create mode 100644 nerv/layer/identity.lua (limited to 'nerv') diff --git a/nerv/layer/duplicate.lua b/nerv/layer/duplicate.lua index 58758e8..fbd4a9e 100644 --- a/nerv/layer/duplicate.lua +++ b/nerv/layer/duplicate.lua @@ -16,13 +16,10 @@ function DuplicateLayer:__init(id, global_conf, layer_conf) end end -function DuplicateLayer:init(batch_size) +function DuplicateLayer:init() end -function DuplicateLayer:batch_resize(batch_size) -end - -function DuplicateLayer:update(bp_err, input, output) +function DuplicateLayer:batch_resize() end function DuplicateLayer:propagate(input, output) @@ -32,9 +29,12 @@ function DuplicateLayer:propagate(input, output) end end -function DuplicateLayer:back_propagate(bp_err, next_bp_err, input, output) +function DuplicateLayer:back_propagate(bp_err, next_bp_err) next_bp_err:copy_from(bp_err[1]) for i = 2, #self.dim_out do next_bp_err:add(next_bp_err, bp_err[i], 1.0, 1.0) end end + +function DuplicateLayer:update() +end diff --git a/nerv/layer/identity.lua b/nerv/layer/identity.lua new file mode 100644 index 0000000..dc796fb --- /dev/null +++ b/nerv/layer/identity.lua @@ -0,0 +1,33 @@ +local IdentityLayer = nerv.class('nerv.IdentityLayer', 'nerv.Layer') + +function IdentityLayer:__init(id, global_conf, layer_conf) + self.id = id + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self.gconf = global_conf + self:check_dim_len(1, 1) + if self.dim_in[1] ~= self.dim_out[1] then + nerv.error('mismatching dimensions of input and output') + end +end + +function IdentityLayer:init() +end + +function IdentityLayer:batch_resize() +end + +function IdentityLayer:propagate(input, output) + output[1]:copy_from(input[1]) +end + +function IdentityLayer:back_propagate(bp_err, next_bp_err) + next_bp_err[1]:copy_from(bp_err) +end + +function IdentityLayer:update() +end + +function IdentityLayer:get_params() + return nerv.ParamRepo({}) +end diff --git a/nerv/nn/network.lua b/nerv/nn/network.lua index e1a9629..3cf052b 100644 --- a/nerv/nn/network.lua +++ b/nerv/nn/network.lua @@ -118,7 +118,7 @@ function network:init(batch_size, chunk_size) end function network:topsort() - nerv.info('Network topology sort') + nerv.info('network topology sort') local degree = {} for t = 1, self.chunk_size do degree[t] = {} @@ -133,7 +133,7 @@ function network:topsort() for j = 1, #dim_out do if self.output_conn[i][j] ~= nil then local edge = self.output_conn[i][j] - local id, _, time = edge[1], edge[2], edge[3] + t + local id, time = edge[1], edge[3] + t if time >= 1 and time <= self.chunk_size and id ~= 0 then degree[time][id] = degree[time][id] + 1 end @@ -160,7 +160,7 @@ function network:topsort() for j = 1, #dim_out do if self.output_conn[i][j] ~= nil then local edge = self.output_conn[i][j] - local id, _, time = edge[1], edge[2], edge[3] + t + local id, time = edge[1], edge[3] + t if time >= 1 and time <= self.chunk_size and id ~= 0 then degree[time][id] = degree[time][id] - 1 if degree[time][id] == 0 then @@ -178,7 +178,7 @@ function network:topsort() end function network:make_initial_store() - nerv.info('Network initing storage') + nerv.info('network initing storage') -- allocate memory local memory = {} @@ -386,6 +386,7 @@ function network:mini_batch_init(information) table.insert(self.border[chunk], i) end end + -- copy legacy for t = 1 - self.delay, 0 do for i = 1, #self.layers do local _, dim_out = self.layers[i]:get_dim() @@ -398,6 +399,7 @@ function network:mini_batch_init(information) end end end + -- flush border gradient for t = self.max_length + 1, self.max_length + self.delay do if t > self.chunk_size then break @@ -419,6 +421,7 @@ function network:propagate(input, output) if t <= self.max_length then self.layers[id]:propagate(self.input[t][id], self.output[t][id], t) end + -- flush border activation for j = 1, #self.border[t] do local batch = self.border[t][j] local _, dim_out = self.layers[id]:get_dim() @@ -437,6 +440,7 @@ function network:back_propagate(bp_err, next_bp_err, input, output) for i = #self.queue, 1, -1 do local t, id = self.queue[i].chunk, self.queue[i].id if t <= self.max_length then + -- flush border gradient for j = 1, #self.border[t] do local batch = self.border[t][j] local dim_in, _ = self.layers[id]:get_dim() @@ -445,6 +449,12 @@ function network:back_propagate(bp_err, next_bp_err, input, output) end end self.layers[id]:back_propagate(self.err_input[t][id], self.err_output[t][id], self.input[t][id], self.output[t][id], t) + if self.clip ~= nil then + local dim_in, _ = self.layers[id]:get_dim() + for j = 1, #dim_in do + self.err_output[t][id][j]:clip(-self.clip, self.clip) + end + end end end end -- cgit v1.2.3 From c682dfee8686c43aed8628633412c9b4d2bd708b Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Wed, 2 Mar 2016 16:43:47 +0800 Subject: fix bug --- nerv/Makefile | 2 +- nerv/layer/duplicate.lua | 4 ++-- nerv/layer/identity.lua | 2 +- nerv/layer/init.lua | 1 + nerv/main.lua | 36 ++++++++++++++++++++++++++++++------ nerv/nn/network.lua | 32 +++++++++++++++++--------------- 6 files changed, 52 insertions(+), 25 deletions(-) (limited to 'nerv') diff --git a/nerv/Makefile b/nerv/Makefile index 0a2aa86..a9b4baf 100644 --- a/nerv/Makefile +++ b/nerv/Makefile @@ -34,7 +34,7 @@ LUA_LIBS := matrix/init.lua io/init.lua init.lua \ layer/init.lua layer/affine.lua layer/sigmoid.lua layer/tanh.lua layer/softmax_ce.lua layer/softmax.lua \ layer/window.lua layer/bias.lua layer/combiner.lua layer/mse.lua \ layer/elem_mul.lua layer/lstm.lua layer/lstm_gate.lua layer/dropout.lua layer/gru.lua \ - layer/graph.lua layer/rnn.lua layer/duplicate.lua\ + layer/graph.lua layer/rnn.lua layer/duplicate.lua layer/identity.lua \ nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/layer_dag.lua nn/network.lua \ io/sgd_buffer.lua \ tnn/init.lua tnn/sutil.lua tnn/tnn.lua diff --git a/nerv/layer/duplicate.lua b/nerv/layer/duplicate.lua index fbd4a9e..1a93b26 100644 --- a/nerv/layer/duplicate.lua +++ b/nerv/layer/duplicate.lua @@ -30,9 +30,9 @@ function DuplicateLayer:propagate(input, output) end function DuplicateLayer:back_propagate(bp_err, next_bp_err) - next_bp_err:copy_from(bp_err[1]) + next_bp_err[1]:copy_from(bp_err[1]) for i = 2, #self.dim_out do - next_bp_err:add(next_bp_err, bp_err[i], 1.0, 1.0) + next_bp_err[1]:add(next_bp_err[1], bp_err[i], 1.0, 1.0) end end diff --git a/nerv/layer/identity.lua b/nerv/layer/identity.lua index dc796fb..aeeff89 100644 --- a/nerv/layer/identity.lua +++ b/nerv/layer/identity.lua @@ -22,7 +22,7 @@ function IdentityLayer:propagate(input, output) end function IdentityLayer:back_propagate(bp_err, next_bp_err) - next_bp_err[1]:copy_from(bp_err) + next_bp_err[1]:copy_from(bp_err[1]) end function IdentityLayer:update() diff --git a/nerv/layer/init.lua b/nerv/layer/init.lua index 6f26d4d..39f97b1 100644 --- a/nerv/layer/init.lua +++ b/nerv/layer/init.lua @@ -126,6 +126,7 @@ nerv.include('dropout.lua') nerv.include('gru.lua') nerv.include('rnn.lua') nerv.include('duplicate.lua') +nerv.include('identity.lua') -- The following lines are for backward compatibility, and will be removed in -- the future. The use of these names are deprecated. diff --git a/nerv/main.lua b/nerv/main.lua index 5cb7d07..865aba0 100644 --- a/nerv/main.lua +++ b/nerv/main.lua @@ -1,8 +1,10 @@ -print 'Hello' - local global_conf = { cumat_type = nerv.CuMatrixFloat, param_random = function() return 0 end, + lrate = 0.1, + wcost = 0, + momentum = 0.9, + batch_size = 2, } local layer_repo = nerv.LayerRepo( @@ -11,13 +13,13 @@ local layer_repo = nerv.LayerRepo( rnn = {dim_in = {23}, dim_out = {26}}, }, ['nerv.AffineLayer'] = { - input = {dim_in = {20}, dim_out = {23}}, + input = {dim_in = {62}, dim_out = {23}}, output = {dim_in = {26, 79}, dim_out = {79}}, }, ['nerv.SigmoidLayer'] = { sigmoid = {dim_in = {23}, dim_out = {23}}, }, - ['nerv.SoftmaxLayer'] = { + ['nerv.IdentityLayer'] = { softmax = {dim_in = {79}, dim_out = {79}}, }, ['nerv.DuplicateLayer'] = { @@ -36,8 +38,30 @@ local connections = { {'softmax[1]', '[1]', 0}, } -local graph = nerv.GraphLayer('graph', global_conf, {dim_in = {20}, dim_out = {79}, layer_repo = layer_repo, connections = connections}) +local graph = nerv.GraphLayer('graph', global_conf, {dim_in = {62}, dim_out = {79}, layer_repo = layer_repo, connections = connections}) local network = nerv.Network('network', global_conf, {network = graph}) -network:init(2,5) +local batch = global_conf.batch_size +local chunk = 5 +network:init(batch, chunk) + +local input = {} +local output = {} +local err_input = {} +local err_output = {} +local input_size = 62 +local output_size = 79 +for i = 1, chunk do + input[i] = {global_conf.cumat_type(batch, input_size)} + output[i] = {global_conf.cumat_type(batch, output_size)} + err_input[i] = {global_conf.cumat_type(batch, output_size)} + err_output[i] = {global_conf.cumat_type(batch, input_size)} +end + +for i = 1, 100 do + network:mini_batch_init({seq_length = {5, 3}, new_seq = {2}}) + network:propagate(input, output) + network:back_propagate(err_input, err_output, input, output) + network:update(err_input, input, output) +end diff --git a/nerv/nn/network.lua b/nerv/nn/network.lua index 3cf052b..0bbcc59 100644 --- a/nerv/nn/network.lua +++ b/nerv/nn/network.lua @@ -320,7 +320,7 @@ function network:make_initial_store() end function network:set_input(input) - for t = 1, #self.chunk_size do + for t = 1, self.chunk_size do for i = 1, #self.dim_in do local edge = self.socket.inputs[i] local id, port, time = edge[1], edge[2], edge[3] @@ -332,7 +332,7 @@ function network:set_input(input) end function network:set_output(output) - for t = 1, #self.chunk_size do + for t = 1, self.chunk_size do for i = 1, #self.dim_out do local edge = self.socket.outputs[i] local id, port, time = edge[1], edge[2], edge[3] @@ -344,7 +344,7 @@ function network:set_output(output) end function network:set_err_input(err_input) - for t = 1, #self.chunk_size do + for t = 1, self.chunk_size do for i = 1, #self.dim_out do local edge = self.socket.outputs[i] local id, port, time = edge[1], edge[2], edge[3] @@ -391,7 +391,9 @@ function network:mini_batch_init(information) for i = 1, #self.layers do local _, dim_out = self.layers[i]:get_dim() for j = 1, #dim_out do - self.legacy[t][i][j]:copy_from(self.output[t + self.chunk_size][i][j]) + if t + self.chunk_size >= 1 and self.output_conn[i][j][1] ~= 0 then + self.legacy[t][i][j]:copy_from(self.output[t + self.chunk_size][i][j]) + end for k = 1, #self.info.new_seq do local batch = self.info.new_seq[k] self.legacy[t][i][j][batch - 1]:fill(self.nn_act_default) @@ -414,8 +416,8 @@ function network:mini_batch_init(information) end function network:propagate(input, output) - network:set_input(input) - network:set_output(output) + self:set_input(input) + self:set_output(output) for i = 1, #self.queue do local t, id = self.queue[i].chunk, self.queue[i].id if t <= self.max_length then @@ -433,18 +435,18 @@ function network:propagate(input, output) end function network:back_propagate(bp_err, next_bp_err, input, output) - network:set_input(input) - network:set_output(output) - network:set_err_input(bp_err) - network:set_err_output(next_bp_err) + self:set_input(input) + self:set_output(output) + self:set_err_input(bp_err) + self:set_err_output(next_bp_err) for i = #self.queue, 1, -1 do local t, id = self.queue[i].chunk, self.queue[i].id if t <= self.max_length then -- flush border gradient for j = 1, #self.border[t] do local batch = self.border[t][j] - local dim_in, _ = self.layers[id]:get_dim() - for k = 1, #dim_in do + local _, dim_out = self.layers[id]:get_dim() + for k = 1, #dim_out do self.err_input[t][id][k][batch - 1]:fill(0) end end @@ -460,9 +462,9 @@ function network:back_propagate(bp_err, next_bp_err, input, output) end function network:update(bp_err, input, output) - network:set_input(input) - network:set_output(output) - network:set_err_input(bp_err) + self:set_input(input) + self:set_output(output) + self:set_err_input(bp_err) for i = 1, #self.queue do local t, id = self.queue[i].chunk, self.queue[i].id if t <= self.max_length then -- cgit v1.2.3 From 8374e8fbc545633b6adf5c4090af8997a65778d2 Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Thu, 3 Mar 2016 19:42:15 +0800 Subject: update add_prefix for graph layer --- nerv/layer/duplicate.lua | 4 ++++ nerv/layer/graph.lua | 24 ++++++++++++++++++++++++ nerv/layer/init.lua | 2 +- nerv/layer/rnn.lua | 4 ++-- nerv/main.lua | 12 +++++++++--- nerv/nn/network.lua | 15 ++++++++++++++- 6 files changed, 54 insertions(+), 7 deletions(-) (limited to 'nerv') diff --git a/nerv/layer/duplicate.lua b/nerv/layer/duplicate.lua index 1a93b26..8988617 100644 --- a/nerv/layer/duplicate.lua +++ b/nerv/layer/duplicate.lua @@ -38,3 +38,7 @@ end function DuplicateLayer:update() end + +function DuplicateLayer:get_params() + return nerv.ParamRepo({}) +end diff --git a/nerv/layer/graph.lua b/nerv/layer/graph.lua index d72d849..1406eff 100644 --- a/nerv/layer/graph.lua +++ b/nerv/layer/graph.lua @@ -21,6 +21,30 @@ local function parse_id(str) return id, port end +function GraphLayer:add_prefix(layers, connections) + local function ap(name) + return self.id .. '.' .. name + end + + for layer_type, sublayers in pairs(layers) do + local tmp = {} + for name, layer_config in pairs(sublayers) do + tmp[ap(name)] = layer_config + end + layers[layer_type] = tmp + end + + for i = 1, #connections do + local from, to = connections[i][1], connections[i][2] + if parse_id(from) ~= '' then + connections[i][1] = ap(from) + end + if parse_id(to) ~= '' then + connections[i][2] = ap(to) + end + end +end + function GraphLayer:discover(id, layer_repo) if id == '' then id = '' diff --git a/nerv/layer/init.lua b/nerv/layer/init.lua index 39f97b1..4fabefa 100644 --- a/nerv/layer/init.lua +++ b/nerv/layer/init.lua @@ -75,7 +75,7 @@ function Layer:set_attr(name, value) end function Layer:get_sublayer(id) - nerv.error('primitive layer does not have sublayers.') + nerv.error('primitive layer does not have sublayers') end function Layer:find_param(pid_list, lconf, gconf, p_type, p_dim) diff --git a/nerv/layer/rnn.lua b/nerv/layer/rnn.lua index 806ac58..38f2326 100644 --- a/nerv/layer/rnn.lua +++ b/nerv/layer/rnn.lua @@ -27,8 +27,6 @@ function RNNLayer:__init(id, global_conf, layer_conf) } } - local layer_repo = nerv.LayerRepo(layers, pr, global_conf) - local connections = { {'[1]', 'main[1]', 0}, {'main[1]', 'sigmoid[1]', 0}, @@ -37,5 +35,7 @@ function RNNLayer:__init(id, global_conf, layer_conf) {'dup[2]', '[1]', 0}, } + self:add_prefix(layers, connections) + local layer_repo = nerv.LayerRepo(layers, pr, global_conf) self:graph_init(layer_repo, connections) end diff --git a/nerv/main.lua b/nerv/main.lua index 865aba0..7c82ebf 100644 --- a/nerv/main.lua +++ b/nerv/main.lua @@ -10,7 +10,8 @@ local global_conf = { local layer_repo = nerv.LayerRepo( { ['nerv.RNNLayer'] = { - rnn = {dim_in = {23}, dim_out = {26}}, + rnn1 = {dim_in = {23}, dim_out = {26}}, + rnn2 = {dim_in = {26}, dim_out = {26}}, }, ['nerv.AffineLayer'] = { input = {dim_in = {62}, dim_out = {23}}, @@ -30,8 +31,9 @@ local layer_repo = nerv.LayerRepo( local connections = { {'[1]', 'input[1]', 0}, {'input[1]', 'sigmoid[1]', 0}, - {'sigmoid[1]', 'rnn[1]', 0}, - {'rnn[1]', 'output[1]', 0}, + {'sigmoid[1]', 'rnn1[1]', 0}, + {'rnn1[1]', 'rnn2[1]', 0}, + {'rnn2[1]', 'output[1]', 0}, {'output[1]', 'dup[1]', 0}, {'dup[1]', 'output[2]', -1}, {'dup[2]', 'softmax[1]', 0}, @@ -65,3 +67,7 @@ for i = 1, 100 do network:back_propagate(err_input, err_output, input, output) network:update(err_input, input, output) end + +local tmp = network:get_params() + +tmp:export('../../workspace/test.param') diff --git a/nerv/nn/network.lua b/nerv/nn/network.lua index 0bbcc59..39df5f0 100644 --- a/nerv/nn/network.lua +++ b/nerv/nn/network.lua @@ -18,7 +18,8 @@ function network:__init(id, global_conf, network_conf) self.layers = {} self.input_conn = {} self.output_conn = {} - self.socket = self:compile(network_conf.network) + self.network = network_conf.network + self.socket = self:compile(self.network) for i = 1, #self.dim_in do local edge = self.socket.inputs[i] local id, port, time = edge[1], edge[2], edge[3] @@ -472,3 +473,15 @@ function network:update(bp_err, input, output) end end end + +function network:set_attr(name, value) + self.network:set_attr(name, value) +end + +function network:get_sublayer(id) + return self.network:get_sublayer(id) +end + +function network:get_params() + return self.network:get_params() +end -- cgit v1.2.3 From 4e56b863203ab6919192efe973ba9f8ee0d5ac65 Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Fri, 4 Mar 2016 19:47:33 +0800 Subject: update lstm --- nerv/layer/lstm.lua | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) (limited to 'nerv') diff --git a/nerv/layer/lstm.lua b/nerv/layer/lstm.lua index 500bd87..caa7569 100644 --- a/nerv/layer/lstm.lua +++ b/nerv/layer/lstm.lua @@ -22,47 +22,47 @@ function LSTMLayer:__init(id, global_conf, layer_conf) local dout1, dout2, dout3 = self.dim_out[1], self.dim_out[2], self.dim_out[3] local layers = { ["nerv.CombinerLayer"] = { - [ap("inputXDup")] = {{}, {dim_in = {din1}, + [ap("inputXDup")] = {dim_in = {din1}, dim_out = {din1, din1, din1, din1}, - lambda = {1}}}, + lambda = {1}}, - [ap("inputHDup")] = {{}, {dim_in = {din2}, + [ap("inputHDup")] = {dim_in = {din2}, dim_out = {din2, din2, din2, din2}, -