aboutsummaryrefslogtreecommitdiff
path: root/nerv/nn/network.lua
diff options
context:
space:
mode:
Diffstat (limited to 'nerv/nn/network.lua')
-rw-r--r--nerv/nn/network.lua285
1 files changed, 199 insertions, 86 deletions
diff --git a/nerv/nn/network.lua b/nerv/nn/network.lua
index 2cb83ce..6f7fe10 100644
--- a/nerv/nn/network.lua
+++ b/nerv/nn/network.lua
@@ -16,6 +16,7 @@ function network:__init(id, global_conf, network_conf)
if self.nn_act_default == nil then
self.nn_act_default = 0
end
+
self.layers = {}
self.input_conn = {}
self.output_conn = {}
@@ -26,7 +27,17 @@ function network:__init(id, global_conf, network_conf)
if self.input_conn[id][port] ~= nil then
nerv.error('duplicate edge')
end
- self.input_conn[id][port] = {0, i, time}
+ if nerv.is_type(self.layers[id], 'nerv.DuplicateLayer') then
+ local tmp = nerv.IdentityLayer('', self.gconf, {dim_in = {self.dim_in[i]}, dim_out = {self.dim_in[i]}})
+ table.insert(self.layers, tmp)
+ local new_id = #self.layers
+ self.input_conn[new_id] = {{0, i, time}}
+ self.output_conn[new_id] = {{id, port, 0}}
+ self.input_conn[id][port] = {new_id, 1, 0}
+ self.socket.inputs[i] = {new_id, 1, time}
+ else
+ self.input_conn[id][port] = {0, i, time}
+ end
end
for i = 1, #self.dim_out do
local edge = self.socket.outputs[i]
@@ -34,18 +45,53 @@ function network:__init(id, global_conf, network_conf)
if self.output_conn[id][port] ~= nil then
nerv.error('duplicate edge')
end
- self.output_conn[id][port] = {0, i, time}
+ if nerv.is_type(self.layers[id], 'nerv.DuplicateLayer') then
+ local tmp = nerv.IdentityLayer('', self.gconf, {dim_in = {self.dim_out[i]}, dim_out = {self.dim_out[i]}})
+ table.insert(self.layers, tmp)
+ local new_id = #self.layers
+ self.input_conn[new_id] = {{id, port, 0}}
+ self.output_conn[new_id] = {{0, i, time}}
+ self.output_conn[id][port] = {new_id, 1, 0}
+ self.socket.outputs[i] = {new_id, 1, time}
+ else
+ self.output_conn[id][port] = {0, i, time}
+ end
end
+
self.delay = 0
for i = 1, #self.layers do
local dim_in, _ = self.layers[i]:get_dim()
for j = 1, #dim_in do
+ if self.input_conn[i][j] == nil then
+ nerv.error('dangling input')
+ end
local time = self.input_conn[i][j][3]
if math.abs(time) > self.delay then
self.delay = math.abs(time)
end
end
end
+
+ self.input_edge = {}
+ self.output_edge = {}
+ for t = -self.delay, self.delay do
+ self.input_edge[t] = {}
+ self.output_edge[t] = {}
+ end
+ for i = 1, #self.layers do
+ local dim_in, dim_out = self.layers[i]:get_dim()
+ for j = 1, #dim_in do
+ local time = self.input_conn[i][j][3]
+ table.insert(self.input_edge[time], {i, j})
+ end
+ for j = 1, #dim_out do
+ if self.output_conn[i][j] == nil then
+ nerv.error('dangling output')
+ end
+ local time = self.output_conn[i][j][3]
+ table.insert(self.output_edge[time], {i, j})
+ end
+ end
end
function network:compile(layer)
@@ -112,11 +158,22 @@ function network:init(batch_size, chunk_size)
self:make_initial_store()
collectgarbage('collect')
+
+ self.flush = {}
+ self.gconf.mask = {}
+ for t = 1, self.chunk_size do
+ self.flush[t] = {}
+ self.gconf.mask[t] = self.mat_type(self.batch_size, 1)
+ end
end
function network:epoch_init()
+ self.timestamp = 0
for i = 1, #self.layers do
self.layers[i]:init(self.batch_size, self.chunk_size)
+ for t = 1, self.chunk_size do
+ self.flush[t][i] = {timestamp = 0, input = {}, output = {}}
+ end
end
end
@@ -134,12 +191,10 @@ function network:topsort()
for i = 1, #self.layers do
local _, dim_out = self.layers[i]:get_dim()
for j = 1, #dim_out do
- if self.output_conn[i][j] ~= nil then
- local edge = self.output_conn[i][j]
- local id, time = edge[1], edge[3] + t
- if time >= 1 and time <= self.chunk_size and id ~= 0 then
- degree[time][id] = degree[time][id] + 1
- end
+ local edge = self.output_conn[i][j]
+ local id, time = edge[1], edge[3] + t
+ if time >= 1 and time <= self.chunk_size and id ~= 0 then
+ degree[time][id] = degree[time][id] + 1
end
end
end
@@ -161,15 +216,13 @@ function network:topsort()
l = l + 1
local _, dim_out = self.layers[i]:get_dim()
for j = 1, #dim_out do
- if self.output_conn[i][j] ~= nil then
- local edge = self.output_conn[i][j]
- local id, time = edge[1], edge[3] + t
- if time >= 1 and time <= self.chunk_size and id ~= 0 then
- degree[time][id] = degree[time][id] - 1
- if degree[time][id] == 0 then
- r = r + 1
- self.queue[r] = {chunk = time, id = id}
- end
+ local edge = self.output_conn[i][j]
+ local id, time = edge[1], edge[3] + t
+ if time >= 1 and time <= self.chunk_size and id ~= 0 then
+ degree[time][id] = degree[time][id] - 1
+ if degree[time][id] == 0 then
+ r = r + 1
+ self.queue[r] = {chunk = time, id = id}
end
end
end
@@ -197,22 +250,26 @@ function network:make_initial_store()
err_memory[t][i][j] = self.mat_type(self.batch_size, dim_in[j])
err_memory[t][i][j]:fill(0)
end
- for j = 1, #dim_out do
- memory[t][i][j] = self.mat_type(self.batch_size, dim_out[j])
- memory[t][i][j]:fill(self.nn_act_default)
+ if t < 1 or t > self.chunk_size or not nerv.is_type(self.layers[i], 'nerv.DuplicateLayer') then
+ for j = 1, #dim_out do
+ memory[t][i][j] = self.mat_type(self.batch_size, dim_out[j])
+ memory[t][i][j]:fill(self.nn_act_default)
+ end
end
end
- -- memory[t][0] stores network input
- memory[t][0] = {}
- for j = 1, #self.dim_in do
- memory[t][0][j] = self.mat_type(self.batch_size, self.dim_in[j])
- memory[t][0][j]:fill(self.nn_act_default)
- end
- -- err_memory[t][0] stores network err_input
- err_memory[t][0] = {}
- for j = 1, #self.dim_out do
- err_memory[t][0][j] = self.mat_type(self.batch_size, self.dim_out[j])
- err_memory[t][0][j]:fill(0)
+ if t < 1 or t > self.chunk_size then
+ -- memory[t][0] stores network input
+ memory[t][0] = {}
+ for j = 1, #self.dim_in do
+ memory[t][0][j] = self.mat_type(self.batch_size, self.dim_in[j])
+ memory[t][0][j]:fill(self.nn_act_default)
+ end
+ -- err_memory[t][0] stores network err_input
+ err_memory[t][0] = {}
+ for j = 1, #self.dim_out do
+ err_memory[t][0][j] = self.mat_type(self.batch_size, self.dim_out[j])
+ err_memory[t][0][j]:fill(0)
+ end
end
end
@@ -255,6 +312,28 @@ function network:make_initial_store()
end
end
+ -- reference copy for duplicate layer
+ for i = 1, #self.queue do
+ local t, id = self.queue[i].chunk, self.queue[i].id
+ if nerv.is_type(self.layers[id], 'nerv.DuplicateLayer') then
+ local _, dim_out = self.layers[id]:get_dim()
+ for j = 1, #dim_out do
+ if self.output[t][id][j] ~= nil then
+ nerv.error('duplicate output reference not nil')
+ end
+ self.output[t][id][j] = self.input[t][id][1]
+ local edge = self.output_conn[id][j]
+ local to, port, time = edge[1], edge[2], edge[3] + t
+ if time >= 1 and time <= self.chunk_size then
+ if self.input[time][to][port] ~= nil then
+ nerv.error('duplicate input reference not nil')
+ end
+ self.input[time][to][port] = self.output[t][id][j]
+ end
+ end
+ end
+ end
+
-- check dangling reference
for t = 1, self.chunk_size do
for i = 1, #self.dim_in do
@@ -291,6 +370,7 @@ function network:make_initial_store()
local dim_in, dim_out = self.layers[i]:get_dim()
for j = 1, #dim_in do
if self.input[t][i][j] == nil then
+ print(t,i,j,self.layers[i].id)
nerv.error('input reference dangling')
end
if self.err_output[t][i][j] == nil then
@@ -314,9 +394,14 @@ function network:make_initial_store()
self.legacy[t] = {}
for i = 1, #self.layers do
self.legacy[t][i] = {}
- local _, dim_out = self.layers[i]:get_dim()
- for j = 1, #dim_out do
- self.legacy[t][i][j] = memory[t][i][j]
+ end
+ end
+ for d = 1, self.delay do
+ for t = 1 - d, 0 do
+ for i = 1, #self.output_edge[d] do
+ local edge = self.output_edge[d][i]
+ local id, port = edge[1], edge[2]
+ self.legacy[t][id][port] = memory[t][id][port]
end
end
end
@@ -383,59 +468,87 @@ function network:mini_batch_init(info)
self.info = info
self:set_input(self.info.input)
self:set_output(self.info.output)
+ if self.info.do_train then
+ self:set_err_input(self.info.err_input)
+ self:set_err_output(self.info.err_output)
+ end
+
+ -- calculate mask
+ for t = 1, self.chunk_size do
+ local tmp = self.gconf.mmat_type(self.batch_size, 1)
+ for i = 1, self.batch_size do
+ if t <= self.info.seq_length[i] then
+ tmp[i - 1][0] = 1
+ else
+ tmp[i - 1][0] = 0
+ end
+ end
+ self.gconf.mask[t]:copy_fromh(tmp)
+ end
-- calculate border
self.max_length = 0
- self.border = {}
- for i = 1, self.chunk_size do
- self.border[i] = {}
- end
+ self.timestamp = self.timestamp + 1
for i = 1, self.batch_size do
if self.info.seq_length[i] > self.max_length then
self.max_length = self.info.seq_length[i]
end
- for t = 1, self.delay do
- local chunk = self.info.seq_length[i] + t
- if chunk > self.chunk_size then
- break
+ local border = self.info.seq_length[i]
+ for d = 1, self.delay do
+ for t = border + 1, border + d do
+ if t > self.chunk_size then
+ break
+ end
+ for j = 1, #self.output_edge[-d] do
+ local edge = self.output_edge[-d][j]
+ local id, port = edge[1], edge[2]
+ local flush = self.flush[t][id]
+ if flush.timestamp ~= self.timestamp then
+ flush.timestamp = self.timestamp
+ flush.input = {}
+ flush.output = {}
+ end
+ table.insert(flush.output, {port, i})
+ end
+ end
+ if self.info.do_train then
+ for t = border, border - d + 1, -1 do
+ if t < 1 then
+ break
+ end
+ for j = 1, #self.input_edge[-d] do
+ local edge = self.input_edge[-d][j]
+ local id, port = edge[1], edge[2]
+ local flush = self.flush[t][id]
+ if flush.timestamp ~= self.timestamp then
+ flush.timestamp = self.timestamp
+ flush.input = {}
+ flush.output = {}
+ end
+ table.insert(flush.input, {port, i})
+ end
+ end
end
- table.insert(self.border[chunk], i)
end
end
-- copy legacy
- for t = 1 - self.delay, 0 do
- for i = 1, #self.layers do
- local _, dim_out = self.layers[i]:get_dim()
- for j = 1, #dim_out do
- if t + self.chunk_size >= 1 and self.output_conn[i][j][1] ~= 0 then
- self.legacy[t][i][j]:copy_from(self.output[t + self.chunk_size][i][j])
+ for d = 1, self.delay do
+ for t = 1 - d, 0 do
+ for i = 1, #self.output_edge[d] do
+ local edge = self.output_edge[d][i]
+ local id, port = edge[1], edge[2]
+ if t + self.chunk_size >= 1 and self.output_conn[id][port][1] ~= 0 then
+ self.legacy[t][id][port]:copy_from(self.output[t + self.chunk_size][id][port])
end
- for k = 1, #self.info.new_seq do
- local batch = self.info.new_seq[k]
- self.legacy[t][i][j][batch - 1]:fill(self.nn_act_default)
+ for j = 1, #self.info.new_seq do
+ local batch = self.info.new_seq[j]
+ self.legacy[t][id][port][batch - 1]:fill(self.nn_act_default)
end
end
end
end
- if self.info.do_train then
- self:set_err_input(self.info.err_input)
- self:set_err_output(self.info.err_output)
-
- -- flush border gradient
- for t = self.max_length + 1, self.max_length + self.delay do
- if t > self.chunk_size then
- break
- end
- for i = 1, #self.layers do
- local dim_in, _ = self.layers[i]:get_dim()
- for j = 1, #dim_in do
- self.err_output[t][i][j]:fill(0)
- end
- end
- end
- end
end
function network:propagate()
@@ -445,11 +558,11 @@ function network:propagate()
self.layers[id]:propagate(self.input[t][id], self.output[t][id], t)
end
-- flush border activation
- for j = 1, #self.border[t] do
- local batch = self.border[t][j]
- local _, dim_out = self.layers[id]:get_dim()
- for k = 1, #dim_out do
- self.output[t][id][k][batch - 1]:fill(self.nn_act_default)
+ if self.flush[t][id].timestamp == self.timestamp then
+ for j = 1, #self.flush[t][id].output do
+ local border = self.flush[t][id].output[j]
+ local port, batch = border[1], border[2]
+ self.output[t][id][port][batch - 1]:fill(self.nn_act_default)
end
end
end
@@ -459,15 +572,8 @@ function network:back_propagate()
for i = #self.queue, 1, -1 do
local t, id = self.queue[i].chunk, self.queue[i].id
if t <= self.max_length then
- -- flush border gradient
- for j = 1, #self.border[t] do
- local batch = self.border[t][j]
- local _, dim_out = self.layers[id]:get_dim()
- for k = 1, #dim_out do
- self.err_input[t][id][k][batch - 1]:fill(0)
- end
- end
self.layers[id]:back_propagate(self.err_input[t][id], self.err_output[t][id], self.input[t][id], self.output[t][id], t)
+ -- gradient clip
if self.clip ~= nil then
local dim_in, _ = self.layers[id]:get_dim()
for j = 1, #dim_in do
@@ -475,14 +581,21 @@ function network:back_propagate()
end
end
end
+ -- flush border gradient
+ if self.flush[t][id].timestamp == self.timestamp then
+ for j = 1, #self.flush[t][id].input do
+ local border = self.flush[t][id].input[j]
+ local port, batch = border[1], border[2]
+ self.err_output[t][id][port][batch - 1]:fill(0)
+ end
+ end
end
end
function network:update()
- for i = 1, #self.queue do
- local t, id = self.queue[i].chunk, self.queue[i].id
- if t <= self.max_length then
- self.layers[id]:update(self.err_input[t][id], self.input[t][id], self.output[t][id], t)
+ for t = 1, self.max_length do
+ for i = 1, #self.layers do
+ self.layers[i]:update(self.err_input[t][i], self.input[t][i], self.output[t][i], t)
end
end
end