From 77b558898a2a29097d8697a59a7d23cd2a52975f Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Mon, 29 Feb 2016 17:46:09 +0800 Subject: graph layer complete --- nerv/Makefile | 1 + nerv/layer/graph.lua | 118 +++++++++++++++++++++++++++++++++++++++++++++++++ nerv/layer/init.lua | 10 +++++ nerv/layer/rnn.lua | 37 ++++++++++++++++ nerv/main.lua | 31 +++++++++++++ nerv/nn/layer_repo.lua | 14 +++--- 6 files changed, 203 insertions(+), 8 deletions(-) create mode 100644 nerv/layer/graph.lua create mode 100644 nerv/layer/rnn.lua create mode 100644 nerv/main.lua diff --git a/nerv/Makefile b/nerv/Makefile index a2155b9..ba97579 100644 --- a/nerv/Makefile +++ b/nerv/Makefile @@ -34,6 +34,7 @@ LUA_LIBS := matrix/init.lua io/init.lua init.lua \ layer/init.lua layer/affine.lua layer/sigmoid.lua layer/tanh.lua layer/softmax_ce.lua layer/softmax.lua \ layer/window.lua layer/bias.lua layer/combiner.lua layer/mse.lua \ layer/elem_mul.lua layer/lstm.lua layer/lstm_gate.lua layer/dropout.lua layer/gru.lua \ + layer/graph.lua layer/rnn.lua \ nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/layer_dag.lua \ io/sgd_buffer.lua \ tnn/init.lua tnn/sutil.lua tnn/tnn.lua diff --git a/nerv/layer/graph.lua b/nerv/layer/graph.lua new file mode 100644 index 0000000..83cf810 --- /dev/null +++ b/nerv/layer/graph.lua @@ -0,0 +1,118 @@ +local GraphLayer = nerv.class('nerv.GraphLayer', 'nerv.Layer') + +function GraphLayer:__init(id, global_conf, layer_conf) + self.id = id + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self.gconf = global_conf + self:graph_init(layer_conf.layer_repo, layer_conf.connections) +end + +local function parse_id(str) + local id, port, _ + _, _, id, port = string.find(str, "([a-zA-Z0-9_.]+)%[([0-9]+)%]") + if id == nil or port == nil then + _, _, id, port = string.find(str, "(.+)%[([0-9]+)%]") + if not (id == "" or id == "") then + nerv.error("wrong format of connection id") + end + end + port = tonumber(port) + return id, port +end + +local function discover(id, layers, layer_repo) + if id == '' then + id = '' + end + local ref = layers[id] + if ref == nil then + local layer = layer_repo:get_layer(id) + local dim_in, dim_out = layer:get_dim() + ref = { + layer = layer, + inputs = {}, + outputs = {}, + dim_in = dim_in, + dim_out = dim_out, + } + layers[id] = ref + end + return ref +end + +function GraphLayer:graph_init(layer_repo, connections) + self.connections = connections + self.sublayer = nerv.LayerRepo({}, nerv.ParamRepo(), self.gconf) + + -- check data dimension between connected ports + local layers = {} + layers[''] = { + inputs = {}, + outputs = {}, + dim_in = self.dim_out, + dim_out = self.dim_in, + } + for _, edge in pairs(self.connections) do + local from = edge[1] + local to = edge[2] + local id_from, port_from = parse_id(from) + local id_to, port_to = parse_id(to) + local ref_from = discover(id_from, layers, layer_repo) + local ref_to = discover(id_to, layers, layer_repo) + if ref_to.inputs[port_to] ~= nil then + nerv.error('%s has already been attached', to) + end + if ref_from.dim_out[port_from] ~= ref_to.dim_in[port_to] then + nerv.error('mismatching data dimension between %s and %s', from, to) + end + ref_from.outputs[port_from] = true + ref_to.inputs[port_to] = true + end + + -- check dangling ports + for id, ref in pairs(layers) do + if id ~= '' then + for i = 1, #ref.dim_in do + if ref.inputs[i] == nil then + nerv.error('dangling input port %d of layer %s', i, id) + end + end + for i = 1, #ref.dim_out do + if ref.outputs[i] == nil then + nerv.error('dangling output port %d os layer %s', i, id) + end + end + self.sublayer.layers[id] = ref.layer + end + end + for i = 1, #self.dim_in do + if layers[''].outputs[i] == nil then + nerv.error('dangling port %d of layer ', i) + end + end + for i = 1, #self.dim_out do + if layers[''].inputs[i] == nil then + nerv.error('dangling port %d of layer ', i) + end + end +end + +function GraphLayer:set_attr(name, value) + self[name] = value + for id, layer in pairs(self.sublayer.layers) do + layer:set_attr(name, value) + end +end + +function GraphLayer:get_sublayer(id) + return self.sublayer:get_layer(id) +end + +function GraphLayer:get_params() + local param_repos = {} + for id, layer in pairs(self.sublayer.layers) do + table.insert(param_repos, layer:get_params()) + end + return nerv.ParamRepo.merge(param_repos) +end diff --git a/nerv/layer/init.lua b/nerv/layer/init.lua index 54f33ae..5e3395c 100644 --- a/nerv/layer/init.lua +++ b/nerv/layer/init.lua @@ -70,6 +70,14 @@ function Layer:get_dim() return self.dim_in, self.dim_out end +function Layer:set_attr(name, value) + self[name] = value +end + +function Layer:get_sublayer(id) + nerv.error('primitive layer does not have sublayers.') +end + function Layer:find_param(pid_list, lconf, gconf, p_type, p_dim) if type(pid_list) == "string" then pid_list = {pid_list} @@ -101,6 +109,7 @@ function Layer:find_param(pid_list, lconf, gconf, p_type, p_dim) return p end +nerv.include('graph.lua') nerv.include('affine.lua') nerv.include('sigmoid.lua') nerv.include('tanh.lua') @@ -115,6 +124,7 @@ nerv.include('lstm.lua') nerv.include('lstm_gate.lua') nerv.include('dropout.lua') nerv.include('gru.lua') +nerv.include('rnn.lua') -- The following lines are for backward compatibility, and will be removed in -- the future. The use of these names are deprecated. diff --git a/nerv/layer/rnn.lua b/nerv/layer/rnn.lua new file mode 100644 index 0000000..a93530f --- /dev/null +++ b/nerv/layer/rnn.lua @@ -0,0 +1,37 @@ +local RNNLayer = nerv.class('nerv.RNNLayer', 'nerv.GraphLayer') + +function RNNLayer:__init(id, global_conf, layer_conf) + self.id = id + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self.gconf = layer_conf.gconf + self:check_dim_len(1, 1) + + local din = layer_conf.dim_in[1] + local dout = layer_conf.dim_out[1] + + local pr = layer_conf.pr + if pr == nil then + pr = nerv.ParamRepo() + end + + local layers = { + ['nerv.AffineLayer'] = { + main = {dim_in = {din, dout}, dim_out = {dout}, pr = pr}, + }, + ['nerv.SigmoidLayer'] = { + sigmoid = {dim_in = {dout}, dim_out = {dout}}, + }, + } + + local layer_repo = nerv.LayerRepo(layers, pr, global_conf) + + local connections = { + {'[1]', 'main[1]', 0}, + {'main[1]', 'sigmoid[1]', 0}, + {'sigmoid[1]', 'main[2]', 0}, + {'sigmoid[1]', '[1]', 1}, + } + + self:graph_init(layer_repo, connections) +end diff --git a/nerv/main.lua b/nerv/main.lua new file mode 100644 index 0000000..85e291c --- /dev/null +++ b/nerv/main.lua @@ -0,0 +1,31 @@ +print 'Hello' + +local global_conf = { + cumat_type = nerv.CuMatrixFloat, + param_random = function() return 0 end, +} + +local layer_repo = nerv.LayerRepo( + { + ['nerv.RNNLayer'] = { + rnn = {dim_in = {23}, dim_out = {26}}, + }, + ['nerv.AffineLayer'] = { + input = {dim_in = {20}, dim_out = {23}}, + output = {dim_in = {26, 79}, dim_out = {79}}, + }, + ['nerv.SigmoidLayer'] = { + sigmoid = {dim_in = {23}, dim_out = {23}}, + }, + }, nerv.ParamRepo(), global_conf) + +local connections = { + {'[1]', 'input[1]', 0}, + {'input[1]', 'sigmoid[1]', 0}, + {'sigmoid[1]', 'rnn[1]', 0}, + {'rnn[1]', 'output[1]', 0}, + {'output[1]', 'output[2]', 1}, + {'output[1]', '[1]', 0}, +} + +local network = nerv.GraphLayer('network', global_conf, {dim_in = {20}, dim_out = {79}, layer_repo = layer_repo, connections = connections}) diff --git a/nerv/nn/layer_repo.lua b/nerv/nn/layer_repo.lua index 3d3a79f..a169b2b 100644 --- a/nerv/nn/layer_repo.lua +++ b/nerv/nn/layer_repo.lua @@ -12,20 +12,18 @@ function LayerRepo:add_layers(layer_spec, param_repo, global_conf) if layer_type == nil then nerv.error('layer type `%s` not found', ltype) end - for id, spec in pairs(llist) do + for id, layer_config in pairs(llist) do if layers[id] ~= nil then nerv.error("a layer with id %s already exists", id) end nerv.info("create layer: %s", id) - if type(spec[2]) ~= "table" then + if type(layer_config) ~= "table" then nerv.error("layer config table is need") end - layer_config = spec[2] - if type(spec[1]) ~= "table" then - nerv.error("parameter description table is needed") - end - for pname, pid in pairs(spec[1]) do - layer_config[pname] = param_repo:get_param(pid) + if type(layer_config.params) == "table" then + for pname, pid in pairs(layer_config.params) do + layer_config[pname] = param_repo:get_param(pid) + end end if layer_config.pr == nil then layer_config.pr = param_repo -- cgit v1.2.3-70-g09d2 From 1a424bf9233f9b1c67ef135f1a3892b7986c5564 Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Mon, 29 Feb 2016 22:05:43 +0800 Subject: add network & fix graph_layer --- nerv/Makefile | 2 +- nerv/layer/graph.lua | 46 ++++++++++++++++++++++------------- nerv/layer/rnn.lua | 4 ++-- nerv/main.lua | 4 +++- nerv/nn/init.lua | 1 + nerv/nn/network.lua | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 105 insertions(+), 20 deletions(-) create mode 100644 nerv/nn/network.lua diff --git a/nerv/Makefile b/nerv/Makefile index ba97579..c9c3e42 100644 --- a/nerv/Makefile +++ b/nerv/Makefile @@ -35,7 +35,7 @@ LUA_LIBS := matrix/init.lua io/init.lua init.lua \ layer/window.lua layer/bias.lua layer/combiner.lua layer/mse.lua \ layer/elem_mul.lua layer/lstm.lua layer/lstm_gate.lua layer/dropout.lua layer/gru.lua \ layer/graph.lua layer/rnn.lua \ - nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/layer_dag.lua \ + nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/layer_dag.lua nn/network.lua \ io/sgd_buffer.lua \ tnn/init.lua tnn/sutil.lua tnn/tnn.lua diff --git a/nerv/layer/graph.lua b/nerv/layer/graph.lua index 83cf810..36a9672 100644 --- a/nerv/layer/graph.lua +++ b/nerv/layer/graph.lua @@ -21,20 +21,23 @@ local function parse_id(str) return id, port end -local function discover(id, layers, layer_repo) +function GraphLayer:discover(id, layer_repo) if id == '' then id = '' end + local layers = self.layers local ref = layers[id] if ref == nil then local layer = layer_repo:get_layer(id) local dim_in, dim_out = layer:get_dim() + self.layer_num = self.layer_num + 1 ref = { layer = layer, inputs = {}, outputs = {}, dim_in = dim_in, dim_out = dim_out, + id = self.layer_num, } layers[id] = ref end @@ -42,32 +45,37 @@ local function discover(id, layers, layer_repo) end function GraphLayer:graph_init(layer_repo, connections) - self.connections = connections - self.sublayer = nerv.LayerRepo({}, nerv.ParamRepo(), self.gconf) - - -- check data dimension between connected ports local layers = {} layers[''] = { inputs = {}, outputs = {}, dim_in = self.dim_out, dim_out = self.dim_in, + id = 0, } - for _, edge in pairs(self.connections) do - local from = edge[1] - local to = edge[2] + self.layers = layers + self.layer_num = 0 + self.connections = {} + + -- check data dimension between connected ports + for _, edge in pairs(connections) do + local from, to, time = edge[1], edge[2], edge[3] local id_from, port_from = parse_id(from) local id_to, port_to = parse_id(to) - local ref_from = discover(id_from, layers, layer_repo) - local ref_to = discover(id_to, layers, layer_repo) + local ref_from = self:discover(id_from, layer_repo) + local ref_to = self:discover(id_to, layer_repo) if ref_to.inputs[port_to] ~= nil then nerv.error('%s has already been attached', to) end if ref_from.dim_out[port_from] ~= ref_to.dim_in[port_to] then nerv.error('mismatching data dimension between %s and %s', from, to) end + if ref_from.id == 0 and ref_to.id == 0 then + nerv.error('short-circuit connection between and ') + end ref_from.outputs[port_from] = true ref_to.inputs[port_to] = true + table.insert(self.connections, {ref_from.id, port_from, ref_to.id, port_to, time}) end -- check dangling ports @@ -83,7 +91,6 @@ function GraphLayer:graph_init(layer_repo, connections) nerv.error('dangling output port %d os layer %s', i, id) end end - self.sublayer.layers[id] = ref.layer end end for i = 1, #self.dim_in do @@ -100,19 +107,26 @@ end function GraphLayer:set_attr(name, value) self[name] = value - for id, layer in pairs(self.sublayer.layers) do - layer:set_attr(name, value) + for id, ref in pairs(self.layers) do + if id ~= '' then + ref.layer:set_attr(name, value) + end end end function GraphLayer:get_sublayer(id) - return self.sublayer:get_layer(id) + if self.layers[id] == nil or id == '' then + nerv.error('layer with id %s not found', id) + end + return self.layers[id].layer end function GraphLayer:get_params() local param_repos = {} - for id, layer in pairs(self.sublayer.layers) do - table.insert(param_repos, layer:get_params()) + for id, ref in pairs(self.layers) do + if id ~= '' then + table.insert(param_repos, ref.layer:get_params()) + end end return nerv.ParamRepo.merge(param_repos) end diff --git a/nerv/layer/rnn.lua b/nerv/layer/rnn.lua index a93530f..8816891 100644 --- a/nerv/layer/rnn.lua +++ b/nerv/layer/rnn.lua @@ -29,8 +29,8 @@ function RNNLayer:__init(id, global_conf, layer_conf) local connections = { {'[1]', 'main[1]', 0}, {'main[1]', 'sigmoid[1]', 0}, - {'sigmoid[1]', 'main[2]', 0}, - {'sigmoid[1]', '[1]', 1}, + {'sigmoid[1]', 'main[2]', 1}, + {'sigmoid[1]', '[1]', 0}, } self:graph_init(layer_repo, connections) diff --git a/nerv/main.lua b/nerv/main.lua index 85e291c..0633e87 100644 --- a/nerv/main.lua +++ b/nerv/main.lua @@ -28,4 +28,6 @@ local connections = { {'output[1]', '[1]', 0}, } -local network = nerv.GraphLayer('network', global_conf, {dim_in = {20}, dim_out = {79}, layer_repo = layer_repo, connections = connections}) +local graph = nerv.GraphLayer('network', global_conf, {dim_in = {20}, dim_out = {79}, layer_repo = layer_repo, connections = connections}) + +local network = nerv.Network(graph) diff --git a/nerv/nn/init.lua b/nerv/nn/init.lua index cbaf52b..c32ea09 100644 --- a/nerv/nn/init.lua +++ b/nerv/nn/init.lua @@ -1,3 +1,4 @@ nerv.include('layer_repo.lua') nerv.include('param_repo.lua') nerv.include('layer_dag.lua') +nerv.include('network.lua') diff --git a/nerv/nn/network.lua b/nerv/nn/network.lua new file mode 100644 index 0000000..6cee08b --- /dev/null +++ b/nerv/nn/network.lua @@ -0,0 +1,68 @@ +local network = nerv.class('nerv.Network') + +function network:__init(graph) + self.layers = {} + self.socket = self:compile(graph) + for i = 1, #self.layers do + print(self.layers[i].layer.id) + local _, dim_out = self.layers[i].layer:get_dim() + for j = 1, #dim_out do + for k = 1, #self.layers[i].connections[j] do + local connections = self.layers[i].connections[j][k] + print(i, connections[1], connections[2], connections[3]) + end + end + end +end + +function network:compile(layer) + local socket = {inputs = {}, outputs = {}} + if not nerv.is_type(layer, 'nerv.GraphLayer') then + table.insert(self.layers, {layer = layer, connections = {}}) + local id = #self.layers + local dim_in, dim_out = layer:get_dim() + for i = 1, #dim_in do + socket.inputs[i] = {{id, i, 0}} + end + for i = 1, #dim_out do + socket.outputs[i] = {id, i, 0} + self.layers[id].connections[i] = {} + end + else + local sublayer_socket = {} + for id, sublayer in pairs(layer.layers) do + if id ~= '' then + sublayer_socket[sublayer.id] = self:compile(sublayer.layer) + end + end + local dim_in, _ = layer:get_dim() + for i = 1, #dim_in do + socket.inputs[i] = {} + end + for _, edge in pairs(layer.connections) do + -- id = 0 means or + local id_from, port_from = edge[1], edge[2] + local id_to, port_to = edge[3], edge[4] + local time = edge[5] + if id_from == 0 then + for _, input in pairs(sublayer_socket[id_to].inputs[port_to]) do + local id, port, t = input[1], input[2], input[3] + time + table.insert(socket.inputs[port_from], {id, port, t}) + end + else + local output = sublayer_socket[id_from].outputs[port_from] + local id, port, t = output[1], output[2], output[3] + time + if id_to == 0 then + socket.outputs[port_to] = {id, port, t} + else + local connections = self.layers[id].connections[port] + for _, input in pairs(sublayer_socket[id_to].inputs[port_to]) do + local id1, port1, t1 = input[1], input[2], input[3] + table.insert(connections, {id1, port1, t + t1}) + end + end + end + end + end + return socket +end -- cgit v1.2.3-70-g09d2 From 2ea3e139af91eb894d904d7a956e28619b1a70f6 Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Tue, 1 Mar 2016 20:00:53 +0800 Subject: network init complete --- nerv/Makefile | 2 +- nerv/layer/duplicate.lua | 40 ++++++ nerv/layer/graph.lua | 3 + nerv/layer/init.lua | 1 + nerv/layer/rnn.lua | 8 +- nerv/main.lua | 18 ++- nerv/nn/network.lua | 324 +++++++++++++++++++++++++++++++++++++++++++---- 7 files changed, 367 insertions(+), 29 deletions(-) create mode 100644 nerv/layer/duplicate.lua diff --git a/nerv/Makefile b/nerv/Makefile index c9c3e42..0a2aa86 100644 --- a/nerv/Makefile +++ b/nerv/Makefile @@ -34,7 +34,7 @@ LUA_LIBS := matrix/init.lua io/init.lua init.lua \ layer/init.lua layer/affine.lua layer/sigmoid.lua layer/tanh.lua layer/softmax_ce.lua layer/softmax.lua \ layer/window.lua layer/bias.lua layer/combiner.lua layer/mse.lua \ layer/elem_mul.lua layer/lstm.lua layer/lstm_gate.lua layer/dropout.lua layer/gru.lua \ - layer/graph.lua layer/rnn.lua \ + layer/graph.lua layer/rnn.lua layer/duplicate.lua\ nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/layer_dag.lua nn/network.lua \ io/sgd_buffer.lua \ tnn/init.lua tnn/sutil.lua tnn/tnn.lua diff --git a/nerv/layer/duplicate.lua b/nerv/layer/duplicate.lua new file mode 100644 index 0000000..58758e8 --- /dev/null +++ b/nerv/layer/duplicate.lua @@ -0,0 +1,40 @@ +local DuplicateLayer = nerv.class('nerv.DuplicateLayer', 'nerv.Layer') + +function DuplicateLayer:__init(id, global_conf, layer_conf) + self.id = id + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self.gconf = global_conf + self:check_dim_len(1, -1) + if #self.dim_out < 1 then + nerv.error('no output specified') + end + for i = 1, #self.dim_out do + if self.dim_out[i] ~= self.dim_in[1] then + nerv.error('mismatching dimensions of outputs') + end + end +end + +function DuplicateLayer:init(batch_size) +end + +function DuplicateLayer:batch_resize(batch_size) +end + +function DuplicateLayer:update(bp_err, input, output) +end + +function DuplicateLayer:propagate(input, output) + for i = 1, #self.dim_out do + output[i]:copy_from(input[1]) + -- FIXME: use reference copy to speed up + end +end + +function DuplicateLayer:back_propagate(bp_err, next_bp_err, input, output) + next_bp_err:copy_from(bp_err[1]) + for i = 2, #self.dim_out do + next_bp_err:add(next_bp_err, bp_err[i], 1.0, 1.0) + end +end diff --git a/nerv/layer/graph.lua b/nerv/layer/graph.lua index 36a9672..d72d849 100644 --- a/nerv/layer/graph.lua +++ b/nerv/layer/graph.lua @@ -64,6 +64,9 @@ function GraphLayer:graph_init(layer_repo, connections) local id_to, port_to = parse_id(to) local ref_from = self:discover(id_from, layer_repo) local ref_to = self:discover(id_to, layer_repo) + if ref_from.outputs[port_from] ~= nil then + nerv.error('%s has already been attached', from) + end if ref_to.inputs[port_to] ~= nil then nerv.error('%s has already been attached', to) end diff --git a/nerv/layer/init.lua b/nerv/layer/init.lua index 5e3395c..6f26d4d 100644 --- a/nerv/layer/init.lua +++ b/nerv/layer/init.lua @@ -125,6 +125,7 @@ nerv.include('lstm_gate.lua') nerv.include('dropout.lua') nerv.include('gru.lua') nerv.include('rnn.lua') +nerv.include('duplicate.lua') -- The following lines are for backward compatibility, and will be removed in -- the future. The use of these names are deprecated. diff --git a/nerv/layer/rnn.lua b/nerv/layer/rnn.lua index 8816891..806ac58 100644 --- a/nerv/layer/rnn.lua +++ b/nerv/layer/rnn.lua @@ -22,6 +22,9 @@ function RNNLayer:__init(id, global_conf, layer_conf) ['nerv.SigmoidLayer'] = { sigmoid = {dim_in = {dout}, dim_out = {dout}}, }, + ['nerv.DuplicateLayer'] = { + dup = {dim_in = {dout}, dim_out = {dout, dout}}, + } } local layer_repo = nerv.LayerRepo(layers, pr, global_conf) @@ -29,8 +32,9 @@ function RNNLayer:__init(id, global_conf, layer_conf) local connections = { {'[1]', 'main[1]', 0}, {'main[1]', 'sigmoid[1]', 0}, - {'sigmoid[1]', 'main[2]', 1}, - {'sigmoid[1]', '[1]', 0}, + {'sigmoid[1]', 'dup[1]', 0}, + {'dup[1]', 'main[2]', 1}, + {'dup[2]', '[1]', 0}, } self:graph_init(layer_repo, connections) diff --git a/nerv/main.lua b/nerv/main.lua index 0633e87..5cb7d07 100644 --- a/nerv/main.lua +++ b/nerv/main.lua @@ -17,6 +17,12 @@ local layer_repo = nerv.LayerRepo( ['nerv.SigmoidLayer'] = { sigmoid = {dim_in = {23}, dim_out = {23}}, }, + ['nerv.SoftmaxLayer'] = { + softmax = {dim_in = {79}, dim_out = {79}}, + }, + ['nerv.DuplicateLayer'] = { + dup = {dim_in = {79}, dim_out = {79, 79}}, + }, }, nerv.ParamRepo(), global_conf) local connections = { @@ -24,10 +30,14 @@ local connections = { {'input[1]', 'sigmoid[1]', 0}, {'sigmoid[1]', 'rnn[1]', 0}, {'rnn[1]', 'output[1]', 0}, - {'output[1]', 'output[2]', 1}, - {'output[1]', '[1]', 0}, + {'output[1]', 'dup[1]', 0}, + {'dup[1]', 'output[2]', -1}, + {'dup[2]', 'softmax[1]', 0}, + {'softmax[1]', '[1]', 0}, } -local graph = nerv.GraphLayer('network', global_conf, {dim_in = {20}, dim_out = {79}, layer_repo = layer_repo, connections = connections}) +local graph = nerv.GraphLayer('graph', global_conf, {dim_in = {20}, dim_out = {79}, layer_repo = layer_repo, connections = connections}) + +local network = nerv.Network('network', global_conf, {network = graph}) -local network = nerv.Network(graph) +network:init(2,5) diff --git a/nerv/nn/network.lua b/nerv/nn/network.lua index 6cee08b..01290e7 100644 --- a/nerv/nn/network.lua +++ b/nerv/nn/network.lua @@ -1,15 +1,47 @@ local network = nerv.class('nerv.Network') -function network:__init(graph) +function network:__init(id, global_conf, network_conf) + self.id = id + self.dim_in = network_conf.network.dim_in + self.dim_out = network_conf.network.dim_out + self.gconf = global_conf + if self.gconf.use_cpu then + self.mat_type = self.gconf.mmat_type + else + self.mat_type = self.gconf.cumat_type + end + self.clip = network_conf.clip + self.nn_act_default = network_conf.nn_act_default + if self.nn_act_default == nil then + self.nn_act_default = 0 + end self.layers = {} - self.socket = self:compile(graph) + self.input_conn = {} + self.output_conn = {} + self.socket = self:compile(network_conf.network) + for i = 1, #self.dim_in do + local edge = self.socket.inputs[i] + local id, port, time = edge[1], edge[2], edge[3] + if self.input_conn[id][port] ~= nil then + nerv.error('duplicate edge') + end + self.input_conn[id][port] = {0, i, time} + end + for i = 1, #self.dim_out do + local edge = self.socket.outputs[i] + local id, port, time = edge[1], edge[2], edge[3] + if self.output_conn[id][port] ~= nil then + nerv.error('duplicate edge') + end + self.output_conn[id][port] = {0, i, time} + end + self.delay = 0 for i = 1, #self.layers do - print(self.layers[i].layer.id) - local _, dim_out = self.layers[i].layer:get_dim() - for j = 1, #dim_out do - for k = 1, #self.layers[i].connections[j] do - local connections = self.layers[i].connections[j][k] - print(i, connections[1], connections[2], connections[3]) + local dim_in, _ = self.layers[i]:get_dim() + for j = 1, #dim_in do + local time = self.input_conn[i][j][3] + if math.abs(time) > self.delay then + self.delay = math.abs(time) end end end @@ -18,15 +50,16 @@ end function network:compile(layer) local socket = {inputs = {}, outputs = {}} if not nerv.is_type(layer, 'nerv.GraphLayer') then - table.insert(self.layers, {layer = layer, connections = {}}) + table.insert(self.layers, layer) local id = #self.layers + self.input_conn[id] = {} + self.output_conn[id] = {} local dim_in, dim_out = layer:get_dim() for i = 1, #dim_in do - socket.inputs[i] = {{id, i, 0}} + socket.inputs[i] = {id, i, 0} end for i = 1, #dim_out do socket.outputs[i] = {id, i, 0} - self.layers[id].connections[i] = {} end else local sublayer_socket = {} @@ -35,34 +68,281 @@ function network:compile(layer) sublayer_socket[sublayer.id] = self:compile(sublayer.layer) end end - local dim_in, _ = layer:get_dim() - for i = 1, #dim_in do - socket.inputs[i] = {} - end for _, edge in pairs(layer.connections) do -- id = 0 means or local id_from, port_from = edge[1], edge[2] local id_to, port_to = edge[3], edge[4] local time = edge[5] if id_from == 0 then - for _, input in pairs(sublayer_socket[id_to].inputs[port_to]) do - local id, port, t = input[1], input[2], input[3] + time - table.insert(socket.inputs[port_from], {id, port, t}) + if socket.inputs[port_from] ~= nil then + nerv.error('duplicate input socket') end + local input = sublayer_socket[id_to].inputs[port_to] + local id, port, t = input[1], input[2], input[3] + time + socket.inputs[port_from] = {id, port, t} else local output = sublayer_socket[id_from].outputs[port_from] local id, port, t = output[1], output[2], output[3] + time if id_to == 0 then + if socket.outputs[port_to] ~= nil then + nerv.error('duplicate output socket') + end socket.outputs[port_to] = {id, port, t} else - local connections = self.layers[id].connections[port] - for _, input in pairs(sublayer_socket[id_to].inputs[port_to]) do - local id1, port1, t1 = input[1], input[2], input[3] - table.insert(connections, {id1, port1, t + t1}) + local input = sublayer_socket[id_to].inputs[port_to] + local id1, port1, t1 = input[1], input[2], input[3] + if self.input_conn[id1][port1] ~= nil or self.output_conn[id][port] ~= nil then + nerv.error('duplicate edge') end + self.input_conn[id1][port1] = {id, port, t + t1} + self.output_conn[id][port] = {id1, port1, t + t1} end end end end return socket end + +function network:init(batch_size, chunk_size) + self.batch_size = batch_size + self.chunk_size = chunk_size + + self:topsort() + + self:make_initial_store() + collectgarbage('collect') +end + +function network:topsort() + nerv.info('Network topology sort') + local degree = {} + for t = 1, self.chunk_size do + degree[t] = {} + for i = 1, #self.layers do + degree[t][i] = 0 + end + end + + for t = 1, self.chunk_size do + for i = 1, #self.layers do + local _, dim_out = self.layers[i]:get_dim() + for j = 1, #dim_out do + if self.output_conn[i][j] ~= nil then + local edge = self.output_conn[i][j] + local id, _, time = edge[1], edge[2], edge[3] + t + if time >= 1 and time <= self.chunk_size and id ~= 0 then + degree[time][id] = degree[time][id] + 1 + end + end + end + end + end + + self.queue = {} + local l = 1 + local r = 0 + for t = 1, self.chunk_size do + for i = 1, #self.layers do + if degree[t][i] == 0 then + r = r + 1 + self.queue[r] = {chunk = t, id = i} + end + end + end + while l<=r do + local t, i = self.queue[l].chunk, self.queue[l].id + l = l + 1 + local _, dim_out = self.layers[i]:get_dim() + for j = 1, #dim_out do + if self.output_conn[i][j] ~= nil then + local edge = self.output_conn[i][j] + local id, _, time = edge[1], edge[2], edge[3] + t + if time >= 1 and time <= self.chunk_size and id ~= 0 then + degree[time][id] = degree[time][id] - 1 + if degree[time][id] == 0 then + r = r + 1 + self.queue[r] = {chunk = time, id = id} + end + end + end + end + end + + if r ~= self.chunk_size * #self.layers then + nerv.error('loop detected') + end +end + +function network:make_initial_store() + nerv.info('Network initing storage') + + -- allocate memory + local memory = {} + local err_memory = {} + for t = 1 - self.delay, self.chunk_size + self.delay do + memory[t] = {} + err_memory[t] = {} + for i = 1, #self.layers do + memory[t][i] = {} + err_memory[t][i] = {} + local dim_in, dim_out = self.layers[i]:get_dim() + for j = 1, #dim_in do + err_memory[t][i][j] = self.mat_type(self.batch_size, dim_in[j]) + err_memory[t][i][j]:fill(0) + end + for j = 1, #dim_out do + memory[t][i][j] = self.mat_type(self.batch_size, dim_out[j]) + memory[t][i][j]:fill(self.nn_act_default) + end + end + -- memory[t][0] stores network input + memory[t][0] = {} + for j = 1, #self.dim_in do + memory[t][0][j] = self.mat_type(self.batch_size, self.dim_in[j]) + memory[t][0][j]:fill(self.nn_act_default) + end + -- err_memory[t][0] stores network err_input + err_memory[t][0] = {} + for j = 1, #self.dim_out do + err_memory[t][0][j] = self.mat_type(self.batch_size, self.dim_out[j]) + err_memory[t][0][j]:fill(0) + end + end + + -- connect memory and reference + self.input = {} + self.output = {} + self.err_input = {} + self.err_output = {} + for t = 1, self.chunk_size do + self.input[t] = {} + self.output[t] = {} + self.err_input[t] = {} + self.err_output[t] = {} + for i = 1, #self.layers do + self.input[t][i] = {} + self.output[t][i] = {} + self.err_input[t][i] = {} + self.err_output[t][i] = {} + local dim_in, dim_out = self.layers[i]:get_dim() + for j = 1, #dim_in do + local edge = self.input_conn[i][j] + local id, port, time = edge[1], edge[2], edge[3] + if id ~= 0 or t - time < 1 or t - time > self.chunk_size then + self.input[t][i][j] = memory[t - time][id][port] + end + if id ~= 0 then + self.err_output[t][i][j] = err_memory[t][i][j] + end + end + for j = 1, #dim_out do + local edge = self.output_conn[i][j] + local id, port, time = edge[1], edge[2], edge[3] + if id ~= 0 then + self.output[t][i][j] = memory[t][i][j] + end + if id ~= 0 or t + time < 1 or t + time > self.chunk_size then + self.err_input[t][i][j] = err_memory[t + time][id][port] + end + end + end + end + + -- check dangling reference + for t = 1, self.chunk_size do + for i = 1, #self.dim_in do + local edge = self.socket.inputs[i] + local id, port, time = edge[1], edge[2], edge[3] + if t + time >= 1 and t + time <= self.chunk_size then + if self.input[t + time][id][port] ~= nil then + nerv.error('input reference not nil') + end + self.input[t + time][id][port] = true -- just a place holder + if self.err_output[t + time][id][port] ~= nil then + nerv.error('err_output reference not nil') + end + self.err_output[t + time][id][port] = true -- just a place holder + end + end + for i = 1, #self.dim_out do + local edge = self.socket.outputs[i] + local id, port, time = edge[1], edge[2], edge[3] + if t - time >= 1 and t - time <= self.chunk_size then + if self.output[t - time][id][port] ~= nil then + nerv.error('output reference not nil') + end + self.output[t - time][id][port] = true -- just a place holder + if self.err_input[t - time][id][port] ~= nil then + nerv.error('err_output reference not nil') + end + self.err_input[t - time][id][port] = true -- just a place holder + end + end + end + for t = 1, self.chunk_size do + for i = 1, #self.layers do + local dim_in, dim_out = self.layers[i]:get_dim() + for j = 1, #dim_in do + if self.input[t][i][j] == nil then + nerv.error('input reference dangling') + end + if self.err_output[t][i][j] == nil then + nerv.error('err_output reference dangling') + end + end + for j = 1, #dim_out do + if self.output[t][i][j] == nil then + nerv.error('output reference dangling') + end + if self.err_input[t][i][j] == nil then + nerv.error('err_input reference dangling') + end + end + end + end + + -- allocate reference for legacy of previous mini-batch + self.legacy = {} + for t = 1 - self.delay, 0 do + self.legacy[t] = {} + for i = 1, #self.layers do + self.legacy[t][i] = {} + local _, dim_out = self.layers[i]:get_dim() + for j = 1, #dim_out do + self.legacy[t][i][j] = memory[t][i][j] + end + end + end +end + +function network:mini_batch_init(information) + self.info = information + self.max_chunk = 0 + for i = 1, self.batch_size do + if self.info.seq_length[i] > self.max_chunk then + self.max_chunk = self.info.seq_length[i] + end + end + for t = 1 - self.delay, 0 do + for i = 1, #self.layers do + local _, dim_out = self.layers[i]:get_dim() + for j = 1, #dim_out do + self.output[t][i][j]:copy_from(self.output[t + self.chunk_size][i][j]) + end + end + end + for t = self.max_chunk + 1, self.max_chunk + self.delay do + if t > self.chunk_size then + break + end + for i = 1, #self.layers do + local dim_in, _ = self.layers[i]:get_dim() + for j = 1, #dim_in do + self.err_output[t][i][j]:fill(0) + end + end + end +end + +function network:propagate(input, output) +end -- cgit v1.2.3-70-g09d2 From 31e575379fa46eb8f76f00ba62e11626ed67ca72 Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Wed, 2 Mar 2016 13:07:20 +0800 Subject: network complete --- nerv/nn/network.lua | 124 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 119 insertions(+), 5 deletions(-) diff --git a/nerv/nn/network.lua b/nerv/nn/network.lua index 01290e7..e1a9629 100644 --- a/nerv/nn/network.lua +++ b/nerv/nn/network.lua @@ -111,6 +111,10 @@ function network:init(batch_size, chunk_size) self:make_initial_store() collectgarbage('collect') + + for i = 1, #self.layers do + self.layers[i]:init(batch_size, chunk_size) + end end function network:topsort() @@ -315,23 +319,86 @@ function network:make_initial_store() end end +function network:set_input(input) + for t = 1, #self.chunk_size do + for i = 1, #self.dim_in do + local edge = self.socket.inputs[i] + local id, port, time = edge[1], edge[2], edge[3] + if t + time >= 1 and t + time <= self.chunk_size then + self.input[t + time][id][port] = input[t][i] + end + end + end +end + +function network:set_output(output) + for t = 1, #self.chunk_size do + for i = 1, #self.dim_out do + local edge = self.socket.outputs[i] + local id, port, time = edge[1], edge[2], edge[3] + if t - time >= 1 and t - time <= self.chunk_size then + self.output[t - time][id][port] = output[t][i] + end + end + end +end + +function network:set_err_input(err_input) + for t = 1, #self.chunk_size do + for i = 1, #self.dim_out do + local edge = self.socket.outputs[i] + local id, port, time = edge[1], edge[2], edge[3] + if t - time >= 1 and t - time <= self.chunk_size then + self.err_input[t - time][id][port] = err_input[t][i] + end + end + end +end + +function network:set_err_output(err_output) + for t = 1, self.chunk_size do + for i = 1, #self.dim_in do + local edge = self.socket.inputs[i] + local id, port, time = edge[1], edge[2], edge[3] + if t + time >= 1 and t + time <= self.chunk_size then + self.err_output[t + time][id][port] = err_output[t][i] + end + end + end +end + function network:mini_batch_init(information) self.info = information - self.max_chunk = 0 + self.max_length = 0 + self.border = {} + for i = 1, self.chunk_size do + self.border[i] = {} + end for i = 1, self.batch_size do - if self.info.seq_length[i] > self.max_chunk then - self.max_chunk = self.info.seq_length[i] + if self.info.seq_length[i] > self.max_length then + self.max_length = self.info.seq_length[i] + end + for t = 1, self.delay do + local chunk = self.info.seq_length[i] + t + if chunk > self.chunk_size then + break + end + table.insert(self.border[chunk], i) end end for t = 1 - self.delay, 0 do for i = 1, #self.layers do local _, dim_out = self.layers[i]:get_dim() for j = 1, #dim_out do - self.output[t][i][j]:copy_from(self.output[t + self.chunk_size][i][j]) + self.legacy[t][i][j]:copy_from(self.output[t + self.chunk_size][i][j]) + for k = 1, #self.info.new_seq do + local batch = self.info.new_seq[k] + self.legacy[t][i][j][batch - 1]:fill(self.nn_act_default) + end end end end - for t = self.max_chunk + 1, self.max_chunk + self.delay do + for t = self.max_length + 1, self.max_length + self.delay do if t > self.chunk_size then break end @@ -345,4 +412,51 @@ function network:mini_batch_init(information) end function network:propagate(input, output) + network:set_input(input) + network:set_output(output) + for i = 1, #self.queue do + local t, id = self.queue[i].chunk, self.queue[i].id + if t <= self.max_length then + self.layers[id]:propagate(self.input[t][id], self.output[t][id], t) + end + for j = 1, #self.border[t] do + local batch = self.border[t][j] + local _, dim_out = self.layers[id]:get_dim() + for k = 1, #dim_out do + self.output[t][id][k][batch - 1]:fill(self.nn_act_default) + end + end + end +end + +function network:back_propagate(bp_err, next_bp_err, input, output) + network:set_input(input) + network:set_output(output) + network:set_err_input(bp_err) + network:set_err_output(next_bp_err) + for i = #self.queue, 1, -1 do + local t, id = self.queue[i].chunk, self.queue[i].id + if t <= self.max_length then + for j = 1, #self.border[t] do + local batch = self.border[t][j] + local dim_in, _ = self.layers[id]:get_dim() + for k = 1, #dim_in do + self.err_input[t][id][k][batch - 1]:fill(0) + end + end + self.layers[id]:back_propagate(self.err_input[t][id], self.err_output[t][id], self.input[t][id], self.output[t][id], t) + end + end +end + +function network:update(bp_err, input, output) + network:set_input(input) + network:set_output(output) + network:set_err_input(bp_err) + for i = 1, #self.queue do + local t, id = self.queue[i].chunk, self.queue[i].id + if t <= self.max_length then + self.layers[id]:update(self.err_input[t][id], self.input[t][id], self.output[t][id], t) + end + end end -- cgit v1.2.3-70-g09d2 From a87f8954c97cf633a0100c9108764bca8c43a083 Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Wed, 2 Mar 2016 15:38:55 +0800 Subject: add identity layer --- nerv/layer/duplicate.lua | 12 ++++++------ nerv/layer/identity.lua | 33 +++++++++++++++++++++++++++++++++ nerv/nn/network.lua | 18 ++++++++++++++---- 3 files changed, 53 insertions(+), 10 deletions(-) create mode 100644 nerv/layer/identity.lua diff --git a/nerv/layer/duplicate.lua b/nerv/layer/duplicate.lua index 58758e8..fbd4a9e 100644 --- a/nerv/layer/duplicate.lua +++ b/nerv/layer/duplicate.lua @@ -16,13 +16,10 @@ function DuplicateLayer:__init(id, global_conf, layer_conf) end end -function DuplicateLayer:init(batch_size) +function DuplicateLayer:init() end -function DuplicateLayer:batch_resize(batch_size) -end - -function DuplicateLayer:update(bp_err, input, output) +function DuplicateLayer:batch_resize() end function DuplicateLayer:propagate(input, output) @@ -32,9 +29,12 @@ function DuplicateLayer:propagate(input, output) end end -function DuplicateLayer:back_propagate(bp_err, next_bp_err, input, output) +function DuplicateLayer:back_propagate(bp_err, next_bp_err) next_bp_err:copy_from(bp_err[1]) for i = 2, #self.dim_out do next_bp_err:add(next_bp_err, bp_err[i], 1.0, 1.0) end end + +function DuplicateLayer:update() +end diff --git a/nerv/layer/identity.lua b/nerv/layer/identity.lua new file mode 100644 index 0000000..dc796fb --- /dev/null +++ b/nerv/layer/identity.lua @@ -0,0 +1,33 @@ +local IdentityLayer = nerv.class('nerv.IdentityLayer', 'nerv.Layer') + +function IdentityLayer:__init(id, global_conf, layer_conf) + self.id = id + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self.gconf = global_conf + self:check_dim_len(1, 1) + if self.dim_in[1] ~= self.dim_out[1] then + nerv.error('mismatching dimensions of input and output') + end +end + +function IdentityLayer:init() +end + +function IdentityLayer:batch_resize() +end + +function IdentityLayer:propagate(input, output) + output[1]:copy_from(input[1]) +end + +function IdentityLayer:back_propagate(bp_err, next_bp_err) + next_bp_err[1]:copy_from(bp_err) +end + +function IdentityLayer:update() +end + +function IdentityLayer:get_params() + return nerv.ParamRepo({}) +end diff --git a/nerv/nn/network.lua b/nerv/nn/network.lua index e1a9629..3cf052b 100644 --- a/nerv/nn/network.lua +++ b/nerv/nn/network.lua @@ -118,7 +118,7 @@ function network:init(batch_size, chunk_size) end function network:topsort() - nerv.info('Network topology sort') + nerv.info('network topology sort') local degree = {} for t = 1, self.chunk_size do degree[t] = {} @@ -133,7 +133,7 @@ function network:topsort() for j = 1, #dim_out do if self.output_conn[i][j] ~= nil then local edge = self.output_conn[i][j] - local id, _, time = edge[1], edge[2], edge[3] + t + local id, time = edge[1], edge[3] + t if time >= 1 and time <= self.chunk_size and id ~= 0 then degree[time][id] = degree[time][id] + 1 end @@ -160,7 +160,7 @@ function network:topsort() for j = 1, #dim_out do if self.output_conn[i][j] ~= nil then local edge = self.output_conn[i][j] - local id, _, time = edge[1], edge[2], edge[3] + t + local id, time = edge[1], edge[3] + t if time >= 1 and time <= self.chunk_size and id ~= 0 then degree[time][id] = degree[time][id] - 1 if degree[time][id] == 0 then @@ -178,7 +178,7 @@ function network:topsort() end function network:make_initial_store() - nerv.info('Network initing storage') + nerv.info('network initing storage') -- allocate memory local memory = {} @@ -386,6 +386,7 @@ function network:mini_batch_init(information) table.insert(self.border[chunk], i) end end + -- copy legacy for t = 1 - self.delay, 0 do for i = 1, #self.layers do local _, dim_out = self.layers[i]:get_dim() @@ -398,6 +399,7 @@ function network:mini_batch_init(information) end end end + -- flush border gradient for t = self.max_length + 1, self.max_length + self.delay do if t > self.chunk_size then break @@ -419,6 +421,7 @@ function network:propagate(input, output) if t <= self.max_length then self.layers[id]:propagate(self.input[t][id], self.output[t][id], t) end + -- flush border activation for j = 1, #self.border[t] do local batch = self.border[t][j] local _, dim_out = self.layers[id]:get_dim() @@ -437,6 +440,7 @@ function network:back_propagate(bp_err, next_bp_err, input, output) for i = #self.queue, 1, -1 do local t, id = self.queue[i].chunk, self.queue[i].id if t <= self.max_length then + -- flush border gradient for j = 1, #self.border[t] do local batch = self.border[t][j] local dim_in, _ = self.layers[id]:get_dim() @@ -445,6 +449,12 @@ function network:back_propagate(bp_err, next_bp_err, input, output) end end self.layers[id]:back_propagate(self.err_input[t][id], self.err_output[t][id], self.input[t][id], self.output[t][id], t) + if self.clip ~= nil then + local dim_in, _ = self.layers[id]:get_dim() + for j = 1, #dim_in do + self.err_output[t][id][j]:clip(-self.clip, self.clip) + end + end end end end -- cgit v1.2.3-70-g09d2 From c682dfee8686c43aed8628633412c9b4d2bd708b Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Wed, 2 Mar 2016 16:43:47 +0800 Subject: fix bug --- nerv/Makefile | 2 +- nerv/layer/duplicate.lua | 4 ++-- nerv/layer/identity.lua | 2 +- nerv/layer/init.lua | 1 + nerv/main.lua | 36 ++++++++++++++++++++++++++++++------ nerv/nn/network.lua | 32 +++++++++++++++++--------------- 6 files changed, 52 insertions(+), 25 deletions(-) diff --git a/nerv/Makefile b/nerv/Makefile index 0a2aa86..a9b4baf 100644 --- a/nerv/Makefile +++ b/nerv/Makefile @@ -34,7 +34,7 @@ LUA_LIBS := matrix/init.lua io/init.lua init.lua \ layer/init.lua layer/affine.lua layer/sigmoid.lua layer/tanh.lua layer/softmax_ce.lua layer/softmax.lua \ layer/window.lua layer/bias.lua layer/combiner.lua layer/mse.lua \ layer/elem_mul.lua layer/lstm.lua layer/lstm_gate.lua layer/dropout.lua layer/gru.lua \ - layer/graph.lua layer/rnn.lua layer/duplicate.lua\ + layer/graph.lua layer/rnn.lua layer/duplicate.lua layer/identity.lua \ nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/layer_dag.lua nn/network.lua \ io/sgd_buffer.lua \ tnn/init.lua tnn/sutil.lua tnn/tnn.lua diff --git a/nerv/layer/duplicate.lua b/nerv/layer/duplicate.lua index fbd4a9e..1a93b26 100644 --- a/nerv/layer/duplicate.lua +++ b/nerv/layer/duplicate.lua @@ -30,9 +30,9 @@ function DuplicateLayer:propagate(input, output) end function DuplicateLayer:back_propagate(bp_err, next_bp_err) - next_bp_err:copy_from(bp_err[1]) + next_bp_err[1]:copy_from(bp_err[1]) for i = 2, #self.dim_out do - next_bp_err:add(next_bp_err, bp_err[i], 1.0, 1.0) + next_bp_err[1]:add(next_bp_err[1], bp_err[i], 1.0, 1.0) end end diff --git a/nerv/layer/identity.lua b/nerv/layer/identity.lua index dc796fb..aeeff89 100644 --- a/nerv/layer/identity.lua +++ b/nerv/layer/identity.lua @@ -22,7 +22,7 @@ function IdentityLayer:propagate(input, output) end function IdentityLayer:back_propagate(bp_err, next_bp_err) - next_bp_err[1]:copy_from(bp_err) + next_bp_err[1]:copy_from(bp_err[1]) end function IdentityLayer:update() diff --git a/nerv/layer/init.lua b/nerv/layer/init.lua index 6f26d4d..39f97b1 100644 --- a/nerv/layer/init.lua +++ b/nerv/layer/init.lua @@ -126,6 +126,7 @@ nerv.include('dropout.lua') nerv.include('gru.lua') nerv.include('rnn.lua') nerv.include('duplicate.lua') +nerv.include('identity.lua') -- The following lines are for backward compatibility, and will be removed in -- the future. The use of these names are deprecated. diff --git a/nerv/main.lua b/nerv/main.lua index 5cb7d07..865aba0 100644 --- a/nerv/main.lua +++ b/nerv/main.lua @@ -1,8 +1,10 @@ -print 'Hello' - local global_conf = { cumat_type = nerv.CuMatrixFloat, param_random = function() return 0 end, + lrate = 0.1, + wcost = 0, + momentum = 0.9, + batch_size = 2, } local layer_repo = nerv.LayerRepo( @@ -11,13 +13,13 @@ local layer_repo = nerv.LayerRepo( rnn = {dim_in = {23}, dim_out = {26}}, }, ['nerv.AffineLayer'] = { - input = {dim_in = {20}, dim_out = {23}}, + input = {dim_in = {62}, dim_out = {23}}, output = {dim_in = {26, 79}, dim_out = {79}}, }, ['nerv.SigmoidLayer'] = { sigmoid = {dim_in = {23}, dim_out = {23}}, }, - ['nerv.SoftmaxLayer'] = { + ['nerv.IdentityLayer'] = { softmax = {dim_in = {79}, dim_out = {79}}, }, ['nerv.DuplicateLayer'] = { @@ -36,8 +38,30 @@ local connections = { {'softmax[1]', '[1]', 0}, } -local graph = nerv.GraphLayer('graph', global_conf, {dim_in = {20}, dim_out = {79}, layer_repo = layer_repo, connections = connections}) +local graph = nerv.GraphLayer('graph', global_conf, {dim_in = {62}, dim_out = {79}, layer_repo = layer_repo, connections = connections}) local network = nerv.Network('network', global_conf, {network = graph}) -network:init(2,5) +local batch = global_conf.batch_size +local chunk = 5 +network:init(batch, chunk) + +local input = {} +local output = {} +local err_input = {} +local err_output = {} +local input_size = 62 +local output_size = 79 +for i = 1, chunk do + input[i] = {global_conf.cumat_type(batch, input_size)} + output[i] = {global_conf.cumat_type(batch, output_size)} + err_input[i] = {global_conf.cumat_type(batch, output_size)} + err_output[i] = {global_conf.cumat_type(batch, input_size)} +end + +for i = 1, 100 do + network:mini_batch_init({seq_length = {5, 3}, new_seq = {2}}) + network:propagate(input, output) + network:back_propagate(err_input, err_output, input, output) + network:update(err_input, input, output) +end diff --git a/nerv/nn/network.lua b/nerv/nn/network.lua index 3cf052b..0bbcc59 100644 --- a/nerv/nn/network.lua +++ b/nerv/nn/network.lua @@ -320,7 +320,7 @@ function network:make_initial_store() end function network:set_input(input) - for t = 1, #self.chunk_size do + for t = 1, self.chunk_size do for i = 1, #self.dim_in do local edge = self.socket.inputs[i] local id, port, time = edge[1], edge[2], edge[3] @@ -332,7 +332,7 @@ function network:set_input(input) end function network:set_output(output) - for t = 1, #self.chunk_size do + for t = 1, self.chunk_size do for i = 1, #self.dim_out do local edge = self.socket.outputs[i] local id, port, time = edge[1], edge[2], edge[3] @@ -344,7 +344,7 @@ function network:set_output(output) end function network:set_err_input(err_input) - for t = 1, #self.chunk_size do + for t = 1, self.chunk_size do for i = 1, #self.dim_out do local edge = self.socket.outputs[i] local id, port, time = edge[1], edge[2], edge[3] @@ -391,7 +391,9 @@ function network:mini_batch_init(information) for i = 1, #self.layers do local _, dim_out = self.layers[i]:get_dim() for j = 1, #dim_out do - self.legacy[t][i][j]:copy_from(self.output[t + self.chunk_size][i][j]) + if t + self.chunk_size >= 1 and self.output_conn[i][j][1] ~= 0 then + self.legacy[t][i][j]:copy_from(self.output[t + self.chunk_size][i][j]) + end for k = 1, #self.info.new_seq do local batch = self.info.new_seq[k] self.legacy[t][i][j][batch - 1]:fill(self.nn_act_default) @@ -414,8 +416,8 @@ function network:mini_batch_init(information) end function network:propagate(input, output) - network:set_input(input) - network:set_output(output) + self:set_input(input) + self:set_output(output) for i = 1, #self.queue do local t, id = self.queue[i].chunk, self.queue[i].id if t <= self.max_length then @@ -433,18 +435,18 @@ function network:propagate(input, output) end function network:back_propagate(bp_err, next_bp_err, input, output) - network:set_input(input) - network:set_output(output) - network:set_err_input(bp_err) - network:set_err_output(next_bp_err) + self:set_input(input) + self:set_output(output) + self:set_err_input(bp_err) + self:set_err_output(next_bp_err) for i = #self.queue, 1, -1 do local t, id = self.queue[i].chunk, self.queue[i].id if t <= self.max_length then -- flush border gradient for j = 1, #self.border[t] do local batch = self.border[t][j] - local dim_in, _ = self.layers[id]:get_dim() - for k = 1, #dim_in do + local _, dim_out = self.layers[id]:get_dim() + for k = 1, #dim_out do self.err_input[t][id][k][batch - 1]:fill(0) end end @@ -460,9 +462,9 @@ function network:back_propagate(bp_err, next_bp_err, input, output) end function network:update(bp_err, input, output) - network:set_input(input) - network:set_output(output) - network:set_err_input(bp_err) + self:set_input(input) + self:set_output(output) + self:set_err_input(bp_err) for i = 1, #self.queue do local t, id = self.queue[i].chunk, self.queue[i].id if t <= self.max_length then -- cgit v1.2.3-70-g09d2 From 8374e8fbc545633b6adf5c4090af8997a65778d2 Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Thu, 3 Mar 2016 19:42:15 +0800 Subject: update add_prefix for graph layer --- nerv/layer/duplicate.lua | 4 ++++ nerv/layer/graph.lua | 24 ++++++++++++++++++++++++ nerv/layer/init.lua | 2 +- nerv/layer/rnn.lua | 4 ++-- nerv/main.lua | 12 +++++++++--- nerv/nn/network.lua | 15 ++++++++++++++- 6 files changed, 54 insertions(+), 7 deletions(-) diff --git a/nerv/layer/duplicate.lua b/nerv/layer/duplicate.lua index 1a93b26..8988617 100644 --- a/nerv/layer/duplicate.lua +++ b/nerv/layer/duplicate.lua @@ -38,3 +38,7 @@ end function DuplicateLayer:update() end + +function DuplicateLayer:get_params() + return nerv.ParamRepo({}) +end diff --git a/nerv/layer/graph.lua b/nerv/layer/graph.lua index d72d849..1406eff 100644 --- a/nerv/layer/graph.lua +++ b/nerv/layer/graph.lua @@ -21,6 +21,30 @@ local function parse_id(str) return id, port end +function GraphLayer:add_prefix(layers, connections) + local function ap(name) + return self.id .. '.' .. name + end + + for layer_type, sublayers in pairs(layers) do + local tmp = {} + for name, layer_config in pairs(sublayers) do + tmp[ap(name)] = layer_config + end + layers[layer_type] = tmp + end + + for i = 1, #connections do + local from, to = connections[i][1], connections[i][2] + if parse_id(from) ~= '' then + connections[i][1] = ap(from) + end + if parse_id(to) ~= '' then + connections[i][2] = ap(to) + end + end +end + function GraphLayer:discover(id, layer_repo) if id == '' then id = '' diff --git a/nerv/layer/init.lua b/nerv/layer/init.lua index 39f97b1..4fabefa 100644 --- a/nerv/layer/init.lua +++ b/nerv/layer/init.lua @@ -75,7 +75,7 @@ function Layer:set_attr(name, value) end function Layer:get_sublayer(id) - nerv.error('primitive layer does not have sublayers.') + nerv.error('primitive layer does not have sublayers') end function Layer:find_param(pid_list, lconf, gconf, p_type, p_dim) diff --git a/nerv/layer/rnn.lua b/nerv/layer/rnn.lua index 806ac58..38f2326 100644 --- a/nerv/layer/rnn.lua +++ b/nerv/layer/rnn.lua @@ -27,8 +27,6 @@ function RNNLayer:__init(id, global_conf, layer_conf) } } - local layer_repo = nerv.LayerRepo(layers, pr, global_conf) - local connections = { {'[1]', 'main[1]', 0}, {'main[1]', 'sigmoid[1]', 0}, @@ -37,5 +35,7 @@ function RNNLayer:__init(id, global_conf, layer_conf) {'dup[2]', '[1]', 0}, } + self:add_prefix(layers, connections) + local layer_repo = nerv.LayerRepo(layers, pr, global_conf) self:graph_init(layer_repo, connections) end diff --git a/nerv/main.lua b/nerv/main.lua index 865aba0..7c82ebf 100644 --- a/nerv/main.lua +++ b/nerv/main.lua @@ -10,7 +10,8 @@ local global_conf = { local layer_repo = nerv.LayerRepo( { ['nerv.RNNLayer'] = { - rnn = {dim_in = {23}, dim_out = {26}}, + rnn1 = {dim_in = {23}, dim_out = {26}}, + rnn2 = {dim_in = {26}, dim_out = {26}}, }, ['nerv.AffineLayer'] = { input = {dim_in = {62}, dim_out = {23}}, @@ -30,8 +31,9 @@ local layer_repo = nerv.LayerRepo( local connections = { {'[1]', 'input[1]', 0}, {'input[1]', 'sigmoid[1]', 0}, - {'sigmoid[1]', 'rnn[1]', 0}, - {'rnn[1]', 'output[1]', 0}, + {'sigmoid[1]', 'rnn1[1]', 0}, + {'rnn1[1]', 'rnn2[1]', 0}, + {'rnn2[1]', 'output[1]', 0}, {'output[1]', 'dup[1]', 0}, {'dup[1]', 'output[2]', -1}, {'dup[2]', 'softmax[1]', 0}, @@ -65,3 +67,7 @@ for i = 1, 100 do network:back_propagate(err_input, err_output, input, output) network:update(err_input, input, output) end + +local tmp = network:get_params() + +tmp:export('../../workspace/test.param') diff --git a/nerv/nn/network.lua b/nerv/nn/network.lua index 0bbcc59..39df5f0 100644 --- a/nerv/nn/network.lua +++ b/nerv/nn/network.lua @@ -18,7 +18,8 @@ function network:__init(id, global_conf, network_conf) self.layers = {} self.input_conn = {} self.output_conn = {} - self.socket = self:compile(network_conf.network) + self.network = network_conf.network + self.socket = self:compile(self.network) for i = 1, #self.dim_in do local edge = self.socket.inputs[i] local id, port, time = edge[1], edge[2], edge[3] @@ -472,3 +473,15 @@ function network:update(bp_err, input, output) end end end + +function network:set_attr(name, value) + self.network:set_attr(name, value) +end + +function network:get_sublayer(id) + return self.network:get_sublayer(id) +end + +function network:get_params() + return self.network:get_params() +end -- cgit v1.2.3-70-g09d2 From 4e56b863203ab6919192efe973ba9f8ee0d5ac65 Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Fri, 4 Mar 2016 19:47:33 +0800 Subject: update lstm --- nerv/layer/lstm.lua | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/nerv/layer/lstm.lua b/nerv/layer/lstm.lua index 500bd87..caa7569 100644 --- a/nerv/layer/lstm.lua +++ b/nerv/layer/lstm.lua @@ -22,47 +22,47 @@ function LSTMLayer:__init(id, global_conf, layer_conf) local dout1, dout2, dout3 = self.dim_out[1], self.dim_out[2], self.dim_out[3] local layers = { ["nerv.CombinerLayer"] = { - [ap("inputXDup")] = {{}, {dim_in = {din1}, + [ap("inputXDup")] = {dim_in = {din1}, dim_out = {din1, din1, din1, din1}, - lambda = {1}}}, + lambda = {1}}, - [ap("inputHDup")] = {{}, {dim_in = {din2}, + [ap("inputHDup")] = {dim_in = {din2}, dim_out = {din2, din2, din2, din2}, - lambda = {1}}}, + lambda = {1}}, - [ap("inputCDup")] = {{}, {dim_in = {din3}, + [ap("inputCDup")] = {dim_in = {din3}, dim_out = {din3, din3, din3}, - lambda = {1}}}, + lambda = {1}}, - [ap("mainCDup")] = {{}, {dim_in = {din3, din3}, + [ap("mainCDup")] = {dim_in = {din3, din3}, dim_out = {din3, din3, din3}, - lambda = {1, 1}}}, + lambda = {1, 1}}, }, ["nerv.AffineLayer"] = { - [ap("mainAffineL")] = {{}, {dim_in = {din1, din2}, + [ap("mainAffineL")] = {dim_in = {din1, din2}, dim_out = {dout1}, - pr = pr}}, + pr = pr}, }, ["nerv.TanhLayer"] = { - [ap("mainTanhL")] = {{}, {dim_in = {dout1}, dim_out = {dout1}}}, - [ap("outputTanhL")] = {{}, {dim_in = {dout1}, dim_out = {dout1}}}, + [ap("mainTanhL")] = {dim_in = {dout1}, dim_out = {dout1}}, + [ap("outputTanhL")] = {dim_in = {dout1}, dim_out = {dout1}}, }, ["nerv.LSTMGateLayer"] = { - [ap("forgetGateL")] = {{}, {dim_in = {din1, din2, din3}, - dim_out = {din3}, pr = pr}}, - [ap("inputGateL")] = {{}, {dim_in = {din1, din2, din3}, - dim_out = {din3}, pr = pr}}, - [ap("outputGateL")] = {{}, {dim_in = {din1, din2, din3}, - dim_out = {din3}, pr = pr}}, + [ap("forgetGateL")] = {dim_in = {din1, din2, din3}, + dim_out = {din3}, pr = pr}, + [ap("inputGateL")] = {dim_in = {din1, din2, din3}, + dim_out = {din3}, pr = pr}, + [ap("outputGateL")] = {dim_in = {din1, din2, din3}, + dim_out = {din3}, pr = pr}, }, ["nerv.ElemMulLayer"] = { - [ap("inputGMulL")] = {{}, {dim_in = {din3, din3}, - dim_out = {din3}}}, - [ap("forgetGMulL")] = {{}, {dim_in = {din3, din3}, - dim_out = {din3}}}, - [ap("outputGMulL")] = {{}, {dim_in = {din3, din3}, - dim_out = {din3}}}, + [ap("inputGMulL")] = {dim_in = {din3, din3}, + dim_out = {din3}}, + [ap("forgetGMulL")] = {dim_in = {din3, din3}, + dim_out = {din3}}, + [ap("outputGMulL")] = {dim_in = {din3, din3}, + dim_out = {din3}}, }, } -- cgit v1.2.3-70-g09d2 From 05fcde5bf0caa1ceb70fef02fc88eda6f00c5ed5 Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Wed, 9 Mar 2016 11:58:13 +0800 Subject: add recipe --- lua/config.lua | 67 +++++++++++++++++++++++++ lua/main.lua | 43 ++++++++++++++++ lua/network.lua | 109 ++++++++++++++++++++++++++++++++++++++++ lua/reader.lua | 112 +++++++++++++++++++++++++++++++++++++++++ lua/select_linear.lua | 62 +++++++++++++++++++++++ lua/timer.lua | 33 ++++++++++++ lua/tnn.lua | 136 ++++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 562 insertions(+) create mode 100644 lua/config.lua create mode 100644 lua/main.lua create mode 100644 lua/network.lua create mode 100644 lua/reader.lua create mode 100644 lua/select_linear.lua create mode 100644 lua/timer.lua create mode 100644 lua/tnn.lua diff --git a/lua/config.lua b/lua/config.lua new file mode 100644 index 0000000..9d73b64 --- /dev/null +++ b/lua/config.lua @@ -0,0 +1,67 @@ +function get_global_conf() + local global_conf = { + lrate = 0.15, + wcost = 1e-5, + momentum = 0, + clip = 5, + cumat_type = nerv.CuMatrixFloat, + mmat_type = nerv.MMatrixFloat, + vocab_size = 10000, + nn_act_default = 0, + hidden_size = 300, + layer_num = 1, + chunk_size = 15, + batch_size = 20, + max_iter = 1, + param_random = function() return (math.random() / 5 - 0.1) end, + dropout = 0.5, + timer = nerv.Timer(), + pr = nerv.ParamRepo(), + } + return global_conf +end + +function get_layers(global_conf) + local pr = global_conf.pr + local layers = { + ['nerv.LSTMLayer'] = {}, + ['nerv.DropoutLayer'] = {}, + ['nerv.SelectLinearLayer'] = { + ['select'] = {dim_in = {1}, dim_out = {global_conf.hidden_size}, vocab = global_conf.vocab_size, pr = pr}, + }, + ['nerv.CombinerLayer'] = {}, + ['nerv.AffineLayer'] = { + output = {dim_in = {global_conf.hidden_size}, dim_out = {global_conf.vocab_size}, pr = pr} + }, + ['nerv.SoftmaxCELayer'] = { + softmax = {dim_in = {global_conf.vocab_size, global_conf.vocab_size}, dim_out = {1}}, + }, + } + for i = 1, global_conf.layer_num do + layers['nerv.LSTMLayer']['lstm' .. i] = {dim_in = {global_conf.hidden_size, global_conf.hidden_size, global_conf.hidden_size}, dim_out = {global_conf.hidden_size, global_conf.hidden_size}, pr = pr} + layers['nerv.DropoutLayer']['dropout' .. i] = {dim_in = {global_conf.hidden_size}, dim_out = {global_conf.hidden_size}} + layers['nerv.CombinerLayer']['dup' .. i] = {dim_in = {global_conf.hidden_size}, dim_out = {global_conf.hidden_size, global_conf.hidden_size}, lambda = {1}} + end + return layers +end + +function get_connections(global_conf) + local connections = { + {'[1]', 'select[1]', 0}, + {'select[1]', 'lstm1[1]', 0}, + {'dropout' .. global_conf.layer_num .. '[1]', 'output[1]', 0}, + {'output[1]', 'softmax[1]', 0}, + {'[2]', 'softmax[2]', 0}, + {'softmax[1]', '[1]', 0}, + } + for i = 1, global_conf.layer_num do + table.insert(connections, {'lstm' .. i .. '[1]', 'dup' .. i .. '[1]', 0}) + table.insert(connections, {'lstm' .. i .. '[2]', 'lstm' .. i .. '[3]', 1}) + table.insert(connections, {'dup' .. i .. '[1]', 'lstm' .. i .. '[2]', 1}) + table.insert(connections, {'dup' .. i .. '[2]', 'dropout' .. i .. '[1]', 0}) + if i > 1 then + table.insert(connections, {'dropout' .. (i - 1) .. '[1]', 'lstm' .. i .. '[1]', 0}) + end + end + return connections +end diff --git a/lua/main.lua b/lua/main.lua new file mode 100644 index 0000000..684efac --- /dev/null +++ b/lua/main.lua @@ -0,0 +1,43 @@ +nerv.include('reader.lua') +nerv.include('timer.lua') +nerv.include('config.lua') +nerv.include(arg[1]) + +local global_conf = get_global_conf() +local timer = global_conf.timer + +timer:tic('IO') + +local data_path = 'nerv/nerv/examples/lmptb/PTBdata/' +local train_reader = nerv.Reader(data_path .. 'vocab', data_path .. 'ptb.train.txt.adds') +local val_reader = nerv.Reader(data_path .. 'vocab', data_path .. 'ptb.valid.txt.adds') + +local train_data = train_reader:get_all_batch(global_conf) +local val_data = val_reader:get_all_batch(global_conf) + +local layers = get_layers(global_conf) +local connections = get_connections(global_conf) + +local NN = nerv.NN(global_conf, train_data, val_data, layers, connections) + +timer:toc('IO') +timer:check('IO') +io.flush() + +timer:tic('global') +local best_cv = 1e10 +for i = 1, global_conf.max_iter do + timer:tic('Epoch' .. i) + local train_ppl, val_ppl = NN:epoch() + if val_ppl < best_cv then + best_cv = val_ppl + else + global_conf.lrate = global_conf.lrate / 2.0 + end + nerv.printf('Epoch %d: %f %f %f\n', i, global_conf.lrate, train_ppl, val_ppl) + timer:toc('Epoch' .. i) + timer:check('Epoch' .. i) + io.flush() +end +timer:toc('global') +timer:check('global') diff --git a/lua/network.lua b/lua/network.lua new file mode 100644 index 0000000..6280f24 --- /dev/null +++ b/lua/network.lua @@ -0,0 +1,109 @@ +nerv.include('select_linear.lua') + +local nn = nerv.class('nerv.NN') + +function nn:__init(global_conf, train_data, val_data, layers, connections) + self.gconf = global_conf + self.network = self:get_network(layers, connections) + self.train_data = self:get_data(train_data) + self.val_data = self:get_data(val_data) +end + +function nn:get_network(layers, connections) + self.gconf.dropout_rate = 0 + local layer_repo = nerv.LayerRepo(layers, self.gconf.pr, self.gconf) + local graph = nerv.GraphLayer('graph', self.gconf, + {dim_in = {1, self.gconf.vocab_size}, dim_out = {1}, + layer_repo = layer_repo, connections = connections}) + local network = nerv.Network('network', self.gconf, + {network = graph, clip = self.gconf.clip}) + network:init(self.gconf.batch_size, self.gconf.chunk_size) + return network +end + +function nn:get_data(data) + local err_output = {} + local softmax_output = {} + local output = {} + for i = 1, self.gconf.chunk_size do + err_output[i] = self.gconf.cumat_type(self.gconf.batch_size, 1) + softmax_output[i] = self.gconf.cumat_type(self.gconf.batch_size, self.gconf.vocab_size) + output[i] = self.gconf.cumat_type(self.gconf.batch_size, 1) + end + local ret = {} + for i = 1, #data do + ret[i] = {} + ret[i].input = {} + ret[i].output = {} + ret[i].err_input = {} + ret[i].err_output = {} + for t = 1, self.gconf.chunk_size do + ret[i].input[t] = {} + ret[i].output[t] = {} + ret[i].err_input[t] = {} + ret[i].err_output[t] = {} + ret[i].input[t][1] = data[i].input[t] + ret[i].input[t][2] = data[i].output[t] + ret[i].output[t][1] = output[t] + local err_input = self.gconf.mmat_type(self.gconf.batch_size, 1) + for j = 1, self.gconf.batch_size do + if t <= data[i].seq_len[j] then + err_input[j - 1][0] = 1 + else + err_input[j - 1][0] = 0 + end + end + ret[i].err_input[t][1] = self.gconf.cumat_type.new_from_host(err_input) + ret[i].err_output[t][1] = err_output[t] + ret[i].err_output[t][2] = softmax_output[t] + end + ret[i].info = {} + ret[i].info.seq_length = data[i].seq_len + ret[i].info.new_seq = {} + for j = 1, self.gconf.batch_size do + if data[i].seq_start[j] then + table.insert(ret[i].info.new_seq, j) + end + end + end + return ret +end + +function nn:process(data, do_train) + local total_err = 0 + local total_frame = 0 + for id = 1, #data do + if do_train then + self.gconf.dropout_rate = self.gconf.dropout + else + self.gconf.dropout_rate = 0 + end + self.network:mini_batch_init(data[id].info) + local input = {} + for t = 1, self.gconf.chunk_size do + input[t] = {data[id].input[t][1], data[id].input[t][2]:decompress(self.gconf.vocab_size)} + end + self.network:propagate(input, data[id].output) + for t = 1, self.gconf.chunk_size do + local tmp = data[id].output[t][1]:new_to_host() + for i = 1, self.gconf.batch_size do + if t <= data[id].info.seq_length[i] then + total_err = total_err + math.log10(math.exp(tmp[i - 1][0])) + total_frame = total_frame + 1 + end + end + end + if do_train then + self.network:back_propagate(data[id].err_input, data[id].err_output, input, data[id].output) + self.network:update(data[id].err_input, input, data[id].output) + end + collectgarbage('collect') + end + return math.pow(10, - total_err / total_frame) +end + +function nn:epoch() + local train_error = self:process(self.train_data, true) + local val_error = self:process(self.val_data, false) + return train_error, val_error +end diff --git a/lua/reader.lua b/lua/reader.lua new file mode 100644 index 0000000..2e51a9c --- /dev/null +++ b/lua/reader.lua @@ -0,0 +1,112 @@ +local Reader = nerv.class('nerv.Reader') + +function Reader:__init(vocab_file, input_file) + self:get_vocab(vocab_file) + self:get_seq(input_file) +end + +function Reader:get_vocab(vocab_file) + local f = io.open(vocab_file, 'r') + local id = 0 + self.vocab = {} + while true do + local word = f:read() + if word == nil then + break + end + self.vocab[word] = id + id = id + 1 + end + self.size = id +end + +function Reader:split(s, t) + local ret = {} + for x in (s .. t):gmatch('(.-)' .. t) do + table.insert(ret, x) + end + return ret +end + +function Reader:get_seq(input_file) + local f = io.open(input_file, 'r') + self.seq = {} + while true do + local seq = f:read() + if seq == nil then + break + end + seq = self:split(seq, ' ') + local tmp = {} + for i = 1, #seq do + if seq[i] ~= '' then + table.insert(tmp, self.vocab[seq[i]]) + end + end + table.insert(self.seq, tmp) + end +end + +function Reader:get_in_out(id, pos) + return self.seq[id][pos], self.seq[id][pos + 1], pos + 1 == #self.seq[id] +end + +function Reader:get_all_batch(global_conf) + local data = {} + local pos = {} + local offset = 1 + for i = 1, global_conf.batch_size do + pos[i] = nil + end + while true do + local input = {} + local output = {} + for i = 1, global_conf.chunk_size do + input[i] = global_conf.mmat_type(global_conf.batch_size, 1) + input[i]:fill(global_conf.nn_act_default) + output[i] = global_conf.mmat_type(global_conf.batch_size, 1) + output[i]:fill(global_conf.nn_act_default) + end + local seq_start = {} + local seq_end = {} + local seq_len = {} + for i = 1, global_conf.batch_size do + seq_start[i] = false + seq_end[i] = false + seq_len[i] = 0 + end + local has_new = false + for i = 1, global_conf.batch_size do + if pos[i] == nil then + if offset < #self.seq then + seq_start[i] = true + pos[i] = {offset, 1} + offset = offset + 1 + end + end + if pos[i] ~= nil then + has_new = true + for j = 1, global_conf.chunk_size do + local final + input[j][i-1][0], output[j][i-1][0], final = self:get_in_out(pos[i][1], pos[i][2]) + seq_len[i] = j + if final then + seq_end[i] = true + pos[i] = nil + break + end + pos[i][2] = pos[i][2] + 1 + end + end + end + if not has_new then + break + end + for i = 1, global_conf.chunk_size do + input[i] = global_conf.cumat_type.new_from_host(input[i]) + output[i] = global_conf.cumat_type.new_from_host(output[i]) + end + table.insert(data, {input = input, output = output, seq_start = seq_start, seq_end = seq_end, seq_len = seq_len}) + end + return data +end diff --git a/lua/select_linear.lua b/lua/select_linear.lua new file mode 100644 index 0000000..a7e20cc --- /dev/null +++ b/lua/select_linear.lua @@ -0,0 +1,62 @@ +local SL = nerv.class('nerv.SelectLinearLayer', 'nerv.Layer') + +--id: string +--global_conf: table +--layer_conf: table +--Get Parameters +function SL:__init(id, global_conf, layer_conf) + self.id = id + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self.gconf = global_conf + + self.vocab = layer_conf.vocab + self.ltp = self:find_param("ltp", layer_conf, global_conf, nerv.LinearTransParam, {self.vocab, self.dim_out[1]}) --layer_conf.ltp + + self:check_dim_len(1, 1) +end + +--Check parameter +function SL:init(batch_size) + if (self.dim_in[1] ~= 1) then --one word id + nerv.error("mismatching dimensions of ltp and input") + end + if (self.dim_out[1] ~= self.ltp.trans:ncol()) then + nerv.error("mismatching dimensions of bp and output") + end + + self.batch_size = bath_size + self.ltp:train_init() +end + +function SL:update(bp_err, input, output) + --use this to produce reproducable result, don't forget to set the dropout to zero! + --for i = 1, input[1]:nrow(), 1 do + -- local word_vec = self.ltp.trans[input[1][i - 1][0]] + -- word_vec:add(word_vec, bp_err[1][i - 1], 1, - self.gconf.lrate / self.gconf.batch_size) + --end + + --I tried the update_select_rows kernel which uses atomicAdd, but it generates unreproducable result + self.ltp.trans:update_select_rows_by_colidx(bp_err[1], input[1], - self.gconf.lrate / self.gconf.batch_size, 0) + self.ltp.trans:add(self.ltp.trans, self.ltp.trans, 1.0, - self.gconf.lrate * self.gconf.wcost) +end + +function SL:propagate(input, output) + --for i = 0, input[1]:ncol() - 1, 1 do + -- if (input[1][0][i] > 0) then + -- output[1][i]:copy_fromd(self.ltp.trans[input[1][0][i]]) + -- else + -- output[1][i]:fill(0) + -- end + --end + output[1]:copy_rows_fromd_by_colidx(self.ltp.trans, input[1]) +end + +function SL:back_propagate(bp_err, next_bp_err, input, output) + --input is compressed, do nothing +end + +function SL:get_params() + local paramRepo = nerv.ParamRepo({self.ltp}) + return paramRepo +end diff --git a/lua/timer.lua b/lua/timer.lua new file mode 100644 index 0000000..2c54ca8 --- /dev/null +++ b/lua/timer.lua @@ -0,0 +1,33 @@ +local Timer = nerv.class("nerv.Timer") + +function Timer:__init() + self.last = {} + self.rec = {} +end + +function Timer:tic(item) + self.last[item] = os.clock() +end + +function Timer:toc(item) + if (self.last[item] == nil) then + nerv.error("item not there") + end + if (self.rec[item] == nil) then + self.rec[item] = 0 + end + self.rec[item] = self.rec[item] + os.clock() - self.last[item] +end + +function Timer:check(item) + if self.rec[item]==nil then + nerv.error('item not there') + end + nerv.printf('"%s" lasts for %f secs.\n',item,self.rec[item]) +end + +function Timer:flush() + for key, value in pairs(self.rec) do + self.rec[key] = nil + end +end diff --git a/lua/tnn.lua b/lua/tnn.lua new file mode 100644 index 0000000..bf9f118 --- /dev/null +++ b/lua/tnn.lua @@ -0,0 +1,136 @@ +nerv.include('select_linear.lua') + +local reader = nerv.class('nerv.TNNReader') + +function reader:__init(global_conf, data) + self.gconf = global_conf + self.offset = 0 + self.data = data +end + +function reader:get_batch(feeds) + self.offset = self.offset + 1 + if self.offset > #self.data then + return false + end + for i = 1, self.gconf.chunk_size do + feeds.inputs_m[i][1]:copy_from(self.data[self.offset].input[i]) + feeds.inputs_m[i][2]:copy_from(self.data[self.offset].output[i]:decompress(self.gconf.vocab_size)) + end + feeds.flags_now = self.data[self.offset].flags + feeds.flagsPack_now = self.data[self.offset].flagsPack + return true +end + +function reader:has_data(t, i) + return t <= self.data[self.offset].seq_len[i] +end + +function reader:get_err_input() + return self.data[self.offset].err_input +end + +local nn = nerv.class('nerv.NN') + +function nn:__init(global_conf, train_data, val_data, layers, connections) + self.gconf = global_conf + self.tnn = self:get_tnn(layers, connections) + self.train_data = self:get_data(train_data) + self.val_data = self:get_data(val_data) +end + +function nn:get_tnn(layers, connections) + self.gconf.dropout_rate = 0 + local layer_repo = nerv.LayerRepo(layers, self.gconf.pr, self.gconf) + local tnn = nerv.TNN('TNN', self.gconf, {dim_in = {1, self.gconf.vocab_size}, + dim_out = {1}, sub_layers = layer_repo, connections = connections, + clip = self.gconf.clip}) + tnn:init(self.gconf.batch_size, self.gconf.chunk_size) + return tnn +end + +function nn:get_data(data) + local ret = {} + for i = 1, #data do + ret[i] = {} + ret[i].input = data[i].input + ret[i].output = data[i].output + ret[i].flags = {} + ret[i].err_input = {} + for t = 1, self.gconf.chunk_size do + ret[i].flags[t] = {} + local err_input = self.gconf.mmat_type(self.gconf.batch_size, 1) + for j = 1, self.gconf.batch_size do + if t <= data[i].seq_len[j] then + ret[i].flags[t][j] = nerv.TNN.FC.SEQ_NORM + err_input[j - 1][0] = 1 + else + ret[i].flags[t][j] = 0 + err_input[j - 1][0] = 0 + end + end + ret[i].err_input[t] = self.gconf.cumat_type.new_from_host(err_input) + end + for j = 1, self.gconf.batch_size do + if data[i].seq_start[j] then + ret[i].flags[1][j] = bit.bor(ret[i].flags[1][j], nerv.TNN.FC.SEQ_START) + end + if data[i].seq_end[j] then + local t = data[i].seq_len[j] + ret[i].flags[t][j] = bit.bor(ret[i].flags[t][j], nerv.TNN.FC.SEQ_END) + end + end + ret[i].flagsPack = {} + for t = 1, self.gconf.chunk_size do + ret[i].flagsPack[t] = 0 + for j = 1, self.gconf.batch_size do + ret[i].flagsPack[t] = bit.bor(ret[i].flagsPack[t], ret[i].flags[t][j]) + end + end + ret[i].seq_len = data[i].seq_len + end + return ret +end + +function nn:process(data, do_train) + local total_err = 0 + local total_frame = 0 + local reader = nerv.TNNReader(self.gconf, data) + while true do + local r, _ = self.tnn:getfeed_from_reader(reader) + if not r then + break + end + if do_train then + self.gconf.dropout_rate = self.gconf.dropout + else + self.gconf.dropout_rate = 0 + end + self.tnn:net_propagate() + for t = 1, self.gconf.chunk_size do + local tmp = self.tnn.outputs_m[t][1]:new_to_host() + for i = 1, self.gconf.batch_size do + if reader:has_data(t, i) then + total_err = total_err + math.log10(math.exp(tmp[i - 1][0])) + total_frame = total_frame + 1 + end + end + end + if do_train then + local err_input = reader:get_err_input() + for i = 1, self.gconf.chunk_size do + self.tnn.err_inputs_m[i][1]:copy_from(err_input[i]) + end + self.tnn:net_backpropagate(false) + self.tnn:net_backpropagate(true) + end + collectgarbage('collect') + end + return math.pow(10, - total_err / total_frame) +end + +function nn:epoch() + local train_error = self:process(self.train_data, true) + local val_error = self:process(self.val_data, false) + return train_error, val_error +end -- cgit v1.2.3-70-g09d2 From f26288ba61d3d16866e1b227a71e7d9c46923436 Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Fri, 11 Mar 2016 13:32:00 +0800 Subject: update mini_batch_init --- lua/config.lua | 4 +-- lua/main.lua | 4 ++- lua/network.lua | 30 ++++++++++++---------- lua/reader.lua | 3 ++- nerv/main.lua | 73 ----------------------------------------------------- nerv/nn/network.lua | 63 ++++++++++++++++++++++++++------------------- 6 files changed, 61 insertions(+), 116 deletions(-) delete mode 100644 nerv/main.lua diff --git a/lua/config.lua b/lua/config.lua index 9d73b64..1ec1198 100644 --- a/lua/config.lua +++ b/lua/config.lua @@ -12,7 +12,7 @@ function get_global_conf() layer_num = 1, chunk_size = 15, batch_size = 20, - max_iter = 1, + max_iter = 3, param_random = function() return (math.random() / 5 - 0.1) end, dropout = 0.5, timer = nerv.Timer(), @@ -34,7 +34,7 @@ function get_layers(global_conf) output = {dim_in = {global_conf.hidden_size}, dim_out = {global_conf.vocab_size}, pr = pr} }, ['nerv.SoftmaxCELayer'] = { - softmax = {dim_in = {global_conf.vocab_size, global_conf.vocab_size}, dim_out = {1}}, + softmax = {dim_in = {global_conf.vocab_size, global_conf.vocab_size}, dim_out = {1}, compressed = true}, }, } for i = 1, global_conf.layer_num do diff --git a/lua/main.lua b/lua/main.lua index 684efac..ce0270a 100644 --- a/lua/main.lua +++ b/lua/main.lua @@ -9,7 +9,7 @@ local timer = global_conf.timer timer:tic('IO') local data_path = 'nerv/nerv/examples/lmptb/PTBdata/' -local train_reader = nerv.Reader(data_path .. 'vocab', data_path .. 'ptb.train.txt.adds') +local train_reader = nerv.Reader(data_path .. 'vocab', data_path .. 'ptb.valid.txt.adds') local val_reader = nerv.Reader(data_path .. 'vocab', data_path .. 'ptb.valid.txt.adds') local train_data = train_reader:get_all_batch(global_conf) @@ -41,3 +41,5 @@ for i = 1, global_conf.max_iter do end timer:toc('global') timer:check('global') +timer:check('network') +timer:check('gc') diff --git a/lua/network.lua b/lua/network.lua index 6280f24..0c11321 100644 --- a/lua/network.lua +++ b/lua/network.lua @@ -57,12 +57,11 @@ function nn:get_data(data) ret[i].err_output[t][1] = err_output[t] ret[i].err_output[t][2] = softmax_output[t] end - ret[i].info = {} - ret[i].info.seq_length = data[i].seq_len - ret[i].info.new_seq = {} + ret[i].seq_length = data[i].seq_len + ret[i].new_seq = {} for j = 1, self.gconf.batch_size do if data[i].seq_start[j] then - table.insert(ret[i].info.new_seq, j) + table.insert(ret[i].new_seq, j) end end end @@ -70,34 +69,39 @@ function nn:get_data(data) end function nn:process(data, do_train) + local timer = self.gconf.timer local total_err = 0 local total_frame = 0 for id = 1, #data do if do_train then self.gconf.dropout_rate = self.gconf.dropout + data[id].do_train = true else self.gconf.dropout_rate = 0 + data[id].do_train = false end - self.network:mini_batch_init(data[id].info) - local input = {} - for t = 1, self.gconf.chunk_size do - input[t] = {data[id].input[t][1], data[id].input[t][2]:decompress(self.gconf.vocab_size)} - end - self.network:propagate(input, data[id].output) + timer:tic('network') + self.network:mini_batch_init(data[id]) + self.network:propagate() + timer:toc('network') for t = 1, self.gconf.chunk_size do local tmp = data[id].output[t][1]:new_to_host() for i = 1, self.gconf.batch_size do - if t <= data[id].info.seq_length[i] then + if t <= data[id].seq_length[i] then total_err = total_err + math.log10(math.exp(tmp[i - 1][0])) total_frame = total_frame + 1 end end end if do_train then - self.network:back_propagate(data[id].err_input, data[id].err_output, input, data[id].output) - self.network:update(data[id].err_input, input, data[id].output) + timer:tic('network') + self.network:back_propagate() + self.network:update() + timer:toc('network') end + timer:tic('gc') collectgarbage('collect') + timer:toc('gc') end return math.pow(10, - total_err / total_frame) end diff --git a/lua/reader.lua b/lua/reader.lua index 2e51a9c..0c7bcb6 100644 --- a/lua/reader.lua +++ b/lua/reader.lua @@ -58,7 +58,8 @@ function Reader:get_all_batch(global_conf) for i = 1, global_conf.batch_size do pos[i] = nil end - while true do + --while true do + for i = 1, 100 do local input = {} local output = {} for i = 1, global_conf.chunk_size do diff --git a/nerv/main.lua b/nerv/main.lua deleted file mode 100644 index 7c82ebf..0000000 --- a/nerv/main.lua +++ /dev/null @@ -1,73 +0,0 @@ -local global_conf = { - cumat_type = nerv.CuMatrixFloat, - param_random = function() return 0 end, - lrate = 0.1, - wcost = 0, - momentum = 0.9, - batch_size = 2, -} - -local layer_repo = nerv.LayerRepo( - { - ['nerv.RNNLayer'] = { - rnn1 = {dim_in = {23}, dim_out = {26}}, - rnn2 = {dim_in = {26}, dim_out = {26}}, - }, - ['nerv.AffineLayer'] = { - input = {dim_in = {62}, dim_out = {23}}, - output = {dim_in = {26, 79}, dim_out = {79}}, - }, - ['nerv.SigmoidLayer'] = { - sigmoid = {dim_in = {23}, dim_out = {23}}, - }, - ['nerv.IdentityLayer'] = { - softmax = {dim_in = {79}, dim_out = {79}}, - }, - ['nerv.DuplicateLayer'] = { - dup = {dim_in = {79}, dim_out = {79, 79}}, - }, - }, nerv.ParamRepo(), global_conf) - -local connections = { - {'[1]', 'input[1]', 0}, - {'input[1]', 'sigmoid[1]', 0}, - {'sigmoid[1]', 'rnn1[1]', 0}, - {'rnn1[1]', 'rnn2[1]', 0}, - {'rnn2[1]', 'output[1]', 0}, - {'output[1]', 'dup[1]', 0}, - {'dup[1]', 'output[2]', -1}, - {'dup[2]', 'softmax[1]', 0}, - {'softmax[1]', '[1]', 0}, -} - -local graph = nerv.GraphLayer('graph', global_conf, {dim_in = {62}, dim_out = {79}, layer_repo = layer_repo, connections = connections}) - -local network = nerv.Network('network', global_conf, {network = graph}) - -local batch = global_conf.batch_size -local chunk = 5 -network:init(batch, chunk) - -local input = {} -local output = {} -local err_input = {} -local err_output = {} -local input_size = 62 -local output_size = 79 -for i = 1, chunk do - input[i] = {global_conf.cumat_type(batch, input_size)} - output[i] = {global_conf.cumat_type(batch, output_size)} - err_input[i] = {global_conf.cumat_type(batch, output_size)} - err_output[i] = {global_conf.cumat_type(batch, input_size)} -end - -for i = 1, 100 do - network:mini_batch_init({seq_length = {5, 3}, new_seq = {2}}) - network:propagate(input, output) - network:back_propagate(err_input, err_output, input, output) - network:update(err_input, input, output) -end - -local tmp = network:get_params() - -tmp:export('../../workspace/test.param') diff --git a/nerv/nn/network.lua b/nerv/nn/network.lua index 39df5f0..35e11e3 100644 --- a/nerv/nn/network.lua +++ b/nerv/nn/network.lua @@ -2,8 +2,9 @@ local network = nerv.class('nerv.Network') function network:__init(id, global_conf, network_conf) self.id = id - self.dim_in = network_conf.network.dim_in - self.dim_out = network_conf.network.dim_out + self.network = network_conf.network + self.dim_in = self.network.dim_in + self.dim_out = self.network.dim_out self.gconf = global_conf if self.gconf.use_cpu then self.mat_type = self.gconf.mmat_type @@ -18,7 +19,6 @@ function network:__init(id, global_conf, network_conf) self.layers = {} self.input_conn = {} self.output_conn = {} - self.network = network_conf.network self.socket = self:compile(self.network) for i = 1, #self.dim_in do local edge = self.socket.inputs[i] @@ -368,8 +368,21 @@ function network:set_err_output(err_output) end end -function network:mini_batch_init(information) - self.info = information +--[[ + [info] is a table that contains information of current mini-batch. These fields must be contained: + [input], [output] : matrix array which stores the network input and output + [seq_length] : a table contains the length of every sequences + [new_seq]: a table contains the batch number of new sequences + [do_train]: a bool value indicates do train or not + if [do_train] is true, these fileds also must be contained: + [err_input], [err_output] : matrix array which stores the network err_input and err_output +--]] +function network:mini_batch_init(info) + self.info = info + self:set_input(self.info.input) + self:set_output(self.info.output) + + -- calculate border self.max_length = 0 self.border = {} for i = 1, self.chunk_size do @@ -387,6 +400,7 @@ function network:mini_batch_init(information) table.insert(self.border[chunk], i) end end + -- copy legacy for t = 1 - self.delay, 0 do for i = 1, #self.layers do @@ -402,23 +416,27 @@ function network:mini_batch_init(information) end end end - -- flush border gradient - for t = self.max_length + 1, self.max_length + self.delay do - if t > self.chunk_size then - break - end - for i = 1, #self.layers do - local dim_in, _ = self.layers[i]:get_dim() - for j = 1, #dim_in do - self.err_output[t][i][j]:fill(0) + + if self.info.do_train then + self:set_err_input(self.info.err_input) + self:set_err_output(self.info.err_output) + + -- flush border gradient + for t = self.max_length + 1, self.max_length + self.delay do + if t > self.chunk_size then + break + end + for i = 1, #self.layers do + local dim_in, _ = self.layers[i]:get_dim() + for j = 1, #dim_in do + self.err_output[t][i][j]:fill(0) + end end end end end -function network:propagate(input, output) - self:set_input(input) - self:set_output(output) +function network:propagate() for i = 1, #self.queue do local t, id = self.queue[i].chunk, self.queue[i].id if t <= self.max_length then @@ -435,11 +453,7 @@ function network:propagate(input, output) end end -function network:back_propagate(bp_err, next_bp_err, input, output) - self:set_input(input) - self:set_output(output) - self:set_err_input(bp_err) - self:set_err_output(next_bp_err) +function network:back_propagate() for i = #self.queue, 1, -1 do local t, id = self.queue[i].chunk, self.queue[i].id if t <= self.max_length then @@ -462,10 +476,7 @@ function network:back_propagate(bp_err, next_bp_err, input, output) end end -function network:update(bp_err, input, output) - self:set_input(input) - self:set_output(output) - self:set_err_input(bp_err) +function network:update() for i = 1, #self.queue do local t, id = self.queue[i].chunk, self.queue[i].id if t <= self.max_length then -- cgit v1.2.3-70-g09d2