From f1656ade78ac71d4cd84f1f7a539d1845b137589 Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Thu, 17 Mar 2016 10:46:43 +0800 Subject: RNN support multiple input & fix network flush bug --- nerv/examples/network_debug/config.lua | 6 ++-- nerv/init.lua | 22 +++++++++++++ nerv/layer/graph.lua | 10 ++++++ nerv/layer/rnn.lua | 22 ++++++++----- nerv/nn/network.lua | 60 +++++++++++++++++++++++----------- 5 files changed, 90 insertions(+), 30 deletions(-) diff --git a/nerv/examples/network_debug/config.lua b/nerv/examples/network_debug/config.lua index 0429e9a..8e68a32 100644 --- a/nerv/examples/network_debug/config.lua +++ b/nerv/examples/network_debug/config.lua @@ -1,6 +1,6 @@ function get_global_conf() local global_conf = { - lrate = 0.15, + lrate = 1.5, wcost = 1e-5, momentum = 0, clip = 5, @@ -10,8 +10,8 @@ function get_global_conf() nn_act_default = 0, hidden_size = 300, layer_num = 1, - chunk_size = 15, - batch_size = 20, + chunk_size = 5, + batch_size = 200, max_iter = 35, param_random = function() return (math.random() / 5 - 0.1) end, dropout_rate = 0.5, diff --git a/nerv/init.lua b/nerv/init.lua index ff944b8..551a9f9 100644 --- a/nerv/init.lua +++ b/nerv/init.lua @@ -356,6 +356,28 @@ function table.vector(len, fill) return v end +function table.connect(tbl1, tbl2) + local res = {} + for i = 1, #tbl1 do + table.insert(res, tbl1[i]) + end + for i = 1, #tbl2 do + table.insert(res, tbl2[i]) + end + return res +end + +function table.merge(tbl1, tbl2) + local res = {} + for k, v in pairs(tbl1) do + res[k] = v + end + for k, v in pairs(tbl2) do + res[k] = v + end + return res +end + -- the following lines trigger the initialization of basic modules nerv.include('matrix/init.lua') diff --git a/nerv/layer/graph.lua b/nerv/layer/graph.lua index 68d5f51..ddbc85e 100644 --- a/nerv/layer/graph.lua +++ b/nerv/layer/graph.lua @@ -65,7 +65,17 @@ function GraphLayer:discover(id, layer_repo) return ref end +local function reverse(connections) + for i = 1, #connections do + connections[i][3] = connections[i][3] * -1 + end +end + function GraphLayer:graph_init(layer_repo, connections) + if self.lconf.reversed then + reverse(connections) + end + local layers = {} layers[''] = { inputs = {}, diff --git a/nerv/layer/rnn.lua b/nerv/layer/rnn.lua index 0b5ccaa..aad2b94 100644 --- a/nerv/layer/rnn.lua +++ b/nerv/layer/rnn.lua @@ -2,13 +2,17 @@ local RNNLayer = nerv.class('nerv.RNNLayer', 'nerv.GraphLayer') function RNNLayer:__init(id, global_conf, layer_conf) nerv.Layer.__init(self, id, global_conf, layer_conf) - self:check_dim_len(1, 1) + self:check_dim_len(-1, 1) + if #self.dim_in == 0 then + nerv.error('RNN Layer %s has no input', self.id) + end - if layer_conf.activation == nil then - layer_conf.activation = 'nerv.SigmoidLayer' + self.activation = layer_conf.activation + if self.activation == nil then + self.activation = 'nerv.SigmoidLayer' end - local din = layer_conf.dim_in[1] + local din = layer_conf.dim_in local dout = layer_conf.dim_out[1] local pr = layer_conf.pr @@ -18,9 +22,9 @@ function RNNLayer:__init(id, global_conf, layer_conf) local layers = { ['nerv.AffineLayer'] = { - main = {dim_in = {din, dout}, dim_out = {dout}, pr = pr}, + main = {dim_in = table.connect({dout}, din), dim_out = {dout}, pr = pr}, }, - [layers.activation] = { + [self.activation] = { activation = {dim_in = {dout}, dim_out = {dout}}, }, ['nerv.DuplicateLayer'] = { @@ -29,12 +33,14 @@ function RNNLayer:__init(id, global_conf, layer_conf) } local connections = { - {'[1]', 'main[1]', 0}, {'main[1]', 'activation[1]', 0}, {'activation[1]', 'duplicate[1]', 0}, - {'duplicate[1]', 'main[2]', 1}, + {'duplicate[1]', 'main[1]', 1}, {'duplicate[2]', '[1]', 0}, } + for i = 1, #din do + table.insert(connections, {'[' .. i .. ']', 'main[' .. (i + 1) .. ']', 0}) + end self:add_prefix(layers, connections) local layer_repo = nerv.LayerRepo(layers, pr, global_conf) diff --git a/nerv/nn/network.lua b/nerv/nn/network.lua index 6f7fe10..cd80b1e 100644 --- a/nerv/nn/network.lua +++ b/nerv/nn/network.lua @@ -398,10 +398,12 @@ function network:make_initial_store() end for d = 1, self.delay do for t = 1 - d, 0 do - for i = 1, #self.output_edge[d] do - local edge = self.output_edge[d][i] - local id, port = edge[1], edge[2] - self.legacy[t][id][port] = memory[t][id][port] + if t + self.chunk_size >= 1 then + for i = 1, #self.output_edge[d] do + local edge = self.output_edge[d][i] + local id, port = edge[1], edge[2] + self.legacy[t][id][port] = memory[t][id][port] + end end end end @@ -486,17 +488,19 @@ function network:mini_batch_init(info) self.gconf.mask[t]:copy_fromh(tmp) end - -- calculate border + -- calculate max length self.max_length = 0 + for i = 1, self.batch_size do + self.max_length = math.max(self.max_length, self.info.seq_length[i]) + end + + -- calculate border self.timestamp = self.timestamp + 1 for i = 1, self.batch_size do - if self.info.seq_length[i] > self.max_length then - self.max_length = self.info.seq_length[i] - end local border = self.info.seq_length[i] for d = 1, self.delay do for t = border + 1, border + d do - if t > self.chunk_size then + if t > self.max_length then break end for j = 1, #self.output_edge[-d] do @@ -532,23 +536,41 @@ function network:mini_batch_init(info) end end + -- flush border gradient + if self.info.do_train then + local border = self.max_length + for d = 1, self.delay do + for t = border + 1, border + d do + if t > self.chunk_size then + break + end + for j = 1, #self.input_edge[d] do + local edge = self.input_edge[d][j] + local id, port = edge[1], edge[2] + self.err_output[t][id][port]:fill(0) + end + end + end + end + -- copy legacy for d = 1, self.delay do for t = 1 - d, 0 do - for i = 1, #self.output_edge[d] do - local edge = self.output_edge[d][i] - local id, port = edge[1], edge[2] - if t + self.chunk_size >= 1 and self.output_conn[id][port][1] ~= 0 then - self.legacy[t][id][port]:copy_from(self.output[t + self.chunk_size][id][port]) - end - for j = 1, #self.info.new_seq do - local batch = self.info.new_seq[j] - self.legacy[t][id][port][batch - 1]:fill(self.nn_act_default) + if t + self.chunk_size >= 1 then + for i = 1, #self.output_edge[d] do + local edge = self.output_edge[d][i] + local id, port = edge[1], edge[2] + if self.output_conn[id][port][1] ~= 0 then + self.legacy[t][id][port]:copy_from(self.output[t + self.chunk_size][id][port]) + end + for j = 1, #self.info.new_seq do + local batch = self.info.new_seq[j] + self.legacy[t][id][port][batch - 1]:fill(self.nn_act_default) + end end end end end - end function network:propagate() -- cgit v1.2.3