summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--nerv/examples/network_debug/config.lua6
-rw-r--r--nerv/init.lua22
-rw-r--r--nerv/layer/graph.lua10
-rw-r--r--nerv/layer/rnn.lua22
-rw-r--r--nerv/nn/network.lua60
5 files changed, 90 insertions, 30 deletions
diff --git a/nerv/examples/network_debug/config.lua b/nerv/examples/network_debug/config.lua
index 0429e9a..8e68a32 100644
--- a/nerv/examples/network_debug/config.lua
+++ b/nerv/examples/network_debug/config.lua
@@ -1,6 +1,6 @@
function get_global_conf()
local global_conf = {
- lrate = 0.15,
+ lrate = 1.5,
wcost = 1e-5,
momentum = 0,
clip = 5,
@@ -10,8 +10,8 @@ function get_global_conf()
nn_act_default = 0,
hidden_size = 300,
layer_num = 1,
- chunk_size = 15,
- batch_size = 20,
+ chunk_size = 5,
+ batch_size = 200,
max_iter = 35,
param_random = function() return (math.random() / 5 - 0.1) end,
dropout_rate = 0.5,
diff --git a/nerv/init.lua b/nerv/init.lua
index 439a83e..d017f82 100644
--- a/nerv/init.lua
+++ b/nerv/init.lua
@@ -360,6 +360,28 @@ function table.vector(len, fill)
return v
end
+function table.connect(tbl1, tbl2)
+ local res = {}
+ for i = 1, #tbl1 do
+ table.insert(res, tbl1[i])
+ end
+ for i = 1, #tbl2 do
+ table.insert(res, tbl2[i])
+ end
+ return res
+end
+
+function table.merge(tbl1, tbl2)
+ local res = {}
+ for k, v in pairs(tbl1) do
+ res[k] = v
+ end
+ for k, v in pairs(tbl2) do
+ res[k] = v
+ end
+ return res
+end
+
-- the following lines trigger the initialization of basic modules
nerv.include('matrix/init.lua')
diff --git a/nerv/layer/graph.lua b/nerv/layer/graph.lua
index 68d5f51..ddbc85e 100644
--- a/nerv/layer/graph.lua
+++ b/nerv/layer/graph.lua
@@ -65,7 +65,17 @@ function GraphLayer:discover(id, layer_repo)
return ref
end
+local function reverse(connections)
+ for i = 1, #connections do
+ connections[i][3] = connections[i][3] * -1
+ end
+end
+
function GraphLayer:graph_init(layer_repo, connections)
+ if self.lconf.reversed then
+ reverse(connections)
+ end
+
local layers = {}
layers['<input>'] = {
inputs = {},
diff --git a/nerv/layer/rnn.lua b/nerv/layer/rnn.lua
index 333be9e..aad2b94 100644
--- a/nerv/layer/rnn.lua
+++ b/nerv/layer/rnn.lua
@@ -2,13 +2,17 @@ local RNNLayer = nerv.class('nerv.RNNLayer', 'nerv.GraphLayer')
function RNNLayer:__init(id, global_conf, layer_conf)
nerv.Layer.__init(self, id, global_conf, layer_conf)
- self:check_dim_len(1, 1)
+ self:check_dim_len(-1, 1)
+ if #self.dim_in == 0 then
+ nerv.error('RNN Layer %s has no input', self.id)
+ end
- if layer_conf.activation == nil then
- layer_conf.activation = 'nerv.SigmoidLayer'
+ self.activation = layer_conf.activation
+ if self.activation == nil then
+ self.activation = 'nerv.SigmoidLayer'
end
- local din = layer_conf.dim_in[1]
+ local din = layer_conf.dim_in
local dout = layer_conf.dim_out[1]
local pr = layer_conf.pr
@@ -18,9 +22,9 @@ function RNNLayer:__init(id, global_conf, layer_conf)
local layers = {
['nerv.AffineLayer'] = {
- main = {dim_in = {din, dout}, dim_out = {dout}, pr = pr},
+ main = {dim_in = table.connect({dout}, din), dim_out = {dout}, pr = pr},
},
- [layer_conf.activation] = {
+ [self.activation] = {
activation = {dim_in = {dout}, dim_out = {dout}},
},
['nerv.DuplicateLayer'] = {
@@ -29,12 +33,14 @@ function RNNLayer:__init(id, global_conf, layer_conf)
}
local connections = {
- {'<input>[1]', 'main[1]', 0},
{'main[1]', 'activation[1]', 0},
{'activation[1]', 'duplicate[1]', 0},
- {'duplicate[1]', 'main[2]', 1},
+ {'duplicate[1]', 'main[1]', 1},
{'duplicate[2]', '<output>[1]', 0},
}
+ for i = 1, #din do
+ table.insert(connections, {'<input>[' .. i .. ']', 'main[' .. (i + 1) .. ']', 0})
+ end
self:add_prefix(layers, connections)
local layer_repo = nerv.LayerRepo(layers, pr, global_conf)
diff --git a/nerv/nn/network.lua b/nerv/nn/network.lua
index 7e2af4e..bb03be4 100644
--- a/nerv/nn/network.lua
+++ b/nerv/nn/network.lua
@@ -398,10 +398,12 @@ function network:make_initial_store()
end
for d = 1, self.delay do
for t = 1 - d, 0 do
- for i = 1, #self.output_edge[d] do
- local edge = self.output_edge[d][i]
- local id, port = edge[1], edge[2]
- self.legacy[t][id][port] = memory[t][id][port]
+ if t + self.chunk_size >= 1 then
+ for i = 1, #self.output_edge[d] do
+ local edge = self.output_edge[d][i]
+ local id, port = edge[1], edge[2]
+ self.legacy[t][id][port] = memory[t][id][port]
+ end
end
end
end
@@ -486,17 +488,19 @@ function network:mini_batch_init(info)
self.gconf.mask[t]:copy_fromh(tmp)
end
- -- calculate border
+ -- calculate max length
self.max_length = 0
+ for i = 1, self.batch_size do
+ self.max_length = math.max(self.max_length, self.info.seq_length[i])
+ end
+
+ -- calculate border
self.timestamp = self.timestamp + 1
for i = 1, self.batch_size do
- if self.info.seq_length[i] > self.max_length then
- self.max_length = self.info.seq_length[i]
- end
local border = self.info.seq_length[i]
for d = 1, self.delay do
for t = border + 1, border + d do
- if t > self.chunk_size then
+ if t > self.max_length then
break
end
for j = 1, #self.output_edge[-d] do
@@ -532,23 +536,41 @@ function network:mini_batch_init(info)
end
end
+ -- flush border gradient
+ if self.info.do_train then
+ local border = self.max_length
+ for d = 1, self.delay do
+ for t = border + 1, border + d do
+ if t > self.chunk_size then
+ break
+ end
+ for j = 1, #self.input_edge[d] do
+ local edge = self.input_edge[d][j]
+ local id, port = edge[1], edge[2]
+ self.err_output[t][id][port]:fill(0)
+ end
+ end
+ end
+ end
+
-- copy legacy
for d = 1, self.delay do
for t = 1 - d, 0 do
- for i = 1, #self.output_edge[d] do
- local edge = self.output_edge[d][i]
- local id, port = edge[1], edge[2]
- if t + self.chunk_size >= 1 and self.output_conn[id][port][1] ~= 0 then
- self.legacy[t][id][port]:copy_from(self.output[t + self.chunk_size][id][port])
- end
- for j = 1, #self.info.new_seq do
- local batch = self.info.new_seq[j]
- self.legacy[t][id][port][batch - 1]:fill(self.nn_act_default)
+ if t + self.chunk_size >= 1 then
+ for i = 1, #self.output_edge[d] do
+ local edge = self.output_edge[d][i]
+ local id, port = edge[1], edge[2]
+ if self.output_conn[id][port][1] ~= 0 then
+ self.legacy[t][id][port]:copy_from(self.output[t + self.chunk_size][id][port])
+ end
+ for j = 1, #self.info.new_seq do
+ local batch = self.info.new_seq[j]
+ self.legacy[t][id][port][batch - 1]:fill(self.nn_act_default)
+ end
end
end
end
end
-
end
function network:propagate()