diff options
-rw-r--r-- | nerv/Makefile | 2 | ||||
-rw-r--r-- | nerv/layer/duplicate.lua | 40 | ||||
-rw-r--r-- | nerv/layer/graph.lua | 3 | ||||
-rw-r--r-- | nerv/layer/init.lua | 1 | ||||
-rw-r--r-- | nerv/layer/rnn.lua | 8 | ||||
-rw-r--r-- | nerv/main.lua | 18 | ||||
-rw-r--r-- | nerv/nn/network.lua | 324 |
7 files changed, 367 insertions, 29 deletions
diff --git a/nerv/Makefile b/nerv/Makefile index c9c3e42..0a2aa86 100644 --- a/nerv/Makefile +++ b/nerv/Makefile @@ -34,7 +34,7 @@ LUA_LIBS := matrix/init.lua io/init.lua init.lua \ layer/init.lua layer/affine.lua layer/sigmoid.lua layer/tanh.lua layer/softmax_ce.lua layer/softmax.lua \ layer/window.lua layer/bias.lua layer/combiner.lua layer/mse.lua \ layer/elem_mul.lua layer/lstm.lua layer/lstm_gate.lua layer/dropout.lua layer/gru.lua \ - layer/graph.lua layer/rnn.lua \ + layer/graph.lua layer/rnn.lua layer/duplicate.lua\ nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/layer_dag.lua nn/network.lua \ io/sgd_buffer.lua \ tnn/init.lua tnn/sutil.lua tnn/tnn.lua diff --git a/nerv/layer/duplicate.lua b/nerv/layer/duplicate.lua new file mode 100644 index 0000000..58758e8 --- /dev/null +++ b/nerv/layer/duplicate.lua @@ -0,0 +1,40 @@ +local DuplicateLayer = nerv.class('nerv.DuplicateLayer', 'nerv.Layer') + +function DuplicateLayer:__init(id, global_conf, layer_conf) + self.id = id + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self.gconf = global_conf + self:check_dim_len(1, -1) + if #self.dim_out < 1 then + nerv.error('no output specified') + end + for i = 1, #self.dim_out do + if self.dim_out[i] ~= self.dim_in[1] then + nerv.error('mismatching dimensions of outputs') + end + end +end + +function DuplicateLayer:init(batch_size) +end + +function DuplicateLayer:batch_resize(batch_size) +end + +function DuplicateLayer:update(bp_err, input, output) +end + +function DuplicateLayer:propagate(input, output) + for i = 1, #self.dim_out do + output[i]:copy_from(input[1]) + -- FIXME: use reference copy to speed up + end +end + +function DuplicateLayer:back_propagate(bp_err, next_bp_err, input, output) + next_bp_err:copy_from(bp_err[1]) + for i = 2, #self.dim_out do + next_bp_err:add(next_bp_err, bp_err[i], 1.0, 1.0) + end +end diff --git a/nerv/layer/graph.lua b/nerv/layer/graph.lua index 36a9672..d72d849 100644 --- a/nerv/layer/graph.lua +++ b/nerv/layer/graph.lua @@ -64,6 +64,9 @@ function GraphLayer:graph_init(layer_repo, connections) local id_to, port_to = parse_id(to) local ref_from = self:discover(id_from, layer_repo) local ref_to = self:discover(id_to, layer_repo) + if ref_from.outputs[port_from] ~= nil then + nerv.error('%s has already been attached', from) + end if ref_to.inputs[port_to] ~= nil then nerv.error('%s has already been attached', to) end diff --git a/nerv/layer/init.lua b/nerv/layer/init.lua index 5e3395c..6f26d4d 100644 --- a/nerv/layer/init.lua +++ b/nerv/layer/init.lua @@ -125,6 +125,7 @@ nerv.include('lstm_gate.lua') nerv.include('dropout.lua') nerv.include('gru.lua') nerv.include('rnn.lua') +nerv.include('duplicate.lua') -- The following lines are for backward compatibility, and will be removed in -- the future. The use of these names are deprecated. diff --git a/nerv/layer/rnn.lua b/nerv/layer/rnn.lua index 8816891..806ac58 100644 --- a/nerv/layer/rnn.lua +++ b/nerv/layer/rnn.lua @@ -22,6 +22,9 @@ function RNNLayer:__init(id, global_conf, layer_conf) ['nerv.SigmoidLayer'] = { sigmoid = {dim_in = {dout}, dim_out = {dout}}, }, + ['nerv.DuplicateLayer'] = { + dup = {dim_in = {dout}, dim_out = {dout, dout}}, + } } local layer_repo = nerv.LayerRepo(layers, pr, global_conf) @@ -29,8 +32,9 @@ function RNNLayer:__init(id, global_conf, layer_conf) local connections = { {'<input>[1]', 'main[1]', 0}, {'main[1]', 'sigmoid[1]', 0}, - {'sigmoid[1]', 'main[2]', 1}, - {'sigmoid[1]', '<output>[1]', 0}, + {'sigmoid[1]', 'dup[1]', 0}, + {'dup[1]', 'main[2]', 1}, + {'dup[2]', '<output>[1]', 0}, } self:graph_init(layer_repo, connections) diff --git a/nerv/main.lua b/nerv/main.lua index 0633e87..5cb7d07 100644 --- a/nerv/main.lua +++ b/nerv/main.lua @@ -17,6 +17,12 @@ local layer_repo = nerv.LayerRepo( ['nerv.SigmoidLayer'] = { sigmoid = {dim_in = {23}, dim_out = {23}}, }, + ['nerv.SoftmaxLayer'] = { + softmax = {dim_in = {79}, dim_out = {79}}, + }, + ['nerv.DuplicateLayer'] = { + dup = {dim_in = {79}, dim_out = {79, 79}}, + }, }, nerv.ParamRepo(), global_conf) local connections = { @@ -24,10 +30,14 @@ local connections = { {'input[1]', 'sigmoid[1]', 0}, {'sigmoid[1]', 'rnn[1]', 0}, {'rnn[1]', 'output[1]', 0}, - {'output[1]', 'output[2]', 1}, - {'output[1]', '<output>[1]', 0}, + {'output[1]', 'dup[1]', 0}, + {'dup[1]', 'output[2]', -1}, + {'dup[2]', 'softmax[1]', 0}, + {'softmax[1]', '<output>[1]', 0}, } -local graph = nerv.GraphLayer('network', global_conf, {dim_in = {20}, dim_out = {79}, layer_repo = layer_repo, connections = connections}) +local graph = nerv.GraphLayer('graph', global_conf, {dim_in = {20}, dim_out = {79}, layer_repo = layer_repo, connections = connections}) + +local network = nerv.Network('network', global_conf, {network = graph}) -local network = nerv.Network(graph) +network:init(2,5) diff --git a/nerv/nn/network.lua b/nerv/nn/network.lua index 6cee08b..01290e7 100644 --- a/nerv/nn/network.lua +++ b/nerv/nn/network.lua @@ -1,15 +1,47 @@ local network = nerv.class('nerv.Network') -function network:__init(graph) +function network:__init(id, global_conf, network_conf) + self.id = id + self.dim_in = network_conf.network.dim_in + self.dim_out = network_conf.network.dim_out + self.gconf = global_conf + if self.gconf.use_cpu then + self.mat_type = self.gconf.mmat_type + else + self.mat_type = self.gconf.cumat_type + end + self.clip = network_conf.clip + self.nn_act_default = network_conf.nn_act_default + if self.nn_act_default == nil then + self.nn_act_default = 0 + end self.layers = {} - self.socket = self:compile(graph) + self.input_conn = {} + self.output_conn = {} + self.socket = self:compile(network_conf.network) + for i = 1, #self.dim_in do + local edge = self.socket.inputs[i] + local id, port, time = edge[1], edge[2], edge[3] + if self.input_conn[id][port] ~= nil then + nerv.error('duplicate edge') + end + self.input_conn[id][port] = {0, i, time} + end + for i = 1, #self.dim_out do + local edge = self.socket.outputs[i] + local id, port, time = edge[1], edge[2], edge[3] + if self.output_conn[id][port] ~= nil then + nerv.error('duplicate edge') + end + self.output_conn[id][port] = {0, i, time} + end + self.delay = 0 for i = 1, #self.layers do - print(self.layers[i].layer.id) - local _, dim_out = self.layers[i].layer:get_dim() - for j = 1, #dim_out do - for k = 1, #self.layers[i].connections[j] do - local connections = self.layers[i].connections[j][k] - print(i, connections[1], connections[2], connections[3]) + local dim_in, _ = self.layers[i]:get_dim() + for j = 1, #dim_in do + local time = self.input_conn[i][j][3] + if math.abs(time) > self.delay then + self.delay = math.abs(time) end end end @@ -18,15 +50,16 @@ end function network:compile(layer) local socket = {inputs = {}, outputs = {}} if not nerv.is_type(layer, 'nerv.GraphLayer') then - table.insert(self.layers, {layer = layer, connections = {}}) + table.insert(self.layers, layer) local id = #self.layers + self.input_conn[id] = {} + self.output_conn[id] = {} local dim_in, dim_out = layer:get_dim() for i = 1, #dim_in do - socket.inputs[i] = {{id, i, 0}} + socket.inputs[i] = {id, i, 0} end for i = 1, #dim_out do socket.outputs[i] = {id, i, 0} - self.layers[id].connections[i] = {} end else local sublayer_socket = {} @@ -35,34 +68,281 @@ function network:compile(layer) sublayer_socket[sublayer.id] = self:compile(sublayer.layer) end end - local dim_in, _ = layer:get_dim() - for i = 1, #dim_in do - socket.inputs[i] = {} - end for _, edge in pairs(layer.connections) do -- id = 0 means <input> or <output> local id_from, port_from = edge[1], edge[2] local id_to, port_to = edge[3], edge[4] local time = edge[5] if id_from == 0 then - for _, input in pairs(sublayer_socket[id_to].inputs[port_to]) do - local id, port, t = input[1], input[2], input[3] + time - table.insert(socket.inputs[port_from], {id, port, t}) + if socket.inputs[port_from] ~= nil then + nerv.error('duplicate input socket') end + local input = sublayer_socket[id_to].inputs[port_to] + local id, port, t = input[1], input[2], input[3] + time + socket.inputs[port_from] = {id, port, t} else local output = sublayer_socket[id_from].outputs[port_from] local id, port, t = output[1], output[2], output[3] + time if id_to == 0 then + if socket.outputs[port_to] ~= nil then + nerv.error('duplicate output socket') + end socket.outputs[port_to] = {id, port, t} else - local connections = self.layers[id].connections[port] - for _, input in pairs(sublayer_socket[id_to].inputs[port_to]) do - local id1, port1, t1 = input[1], input[2], input[3] - table.insert(connections, {id1, port1, t + t1}) + local input = sublayer_socket[id_to].inputs[port_to] + local id1, port1, t1 = input[1], input[2], input[3] + if self.input_conn[id1][port1] ~= nil or self.output_conn[id][port] ~= nil then + nerv.error('duplicate edge') end + self.input_conn[id1][port1] = {id, port, t + t1} + self.output_conn[id][port] = {id1, port1, t + t1} end end end end return socket end + +function network:init(batch_size, chunk_size) + self.batch_size = batch_size + self.chunk_size = chunk_size + + self:topsort() + + self:make_initial_store() + collectgarbage('collect') +end + +function network:topsort() + nerv.info('Network topology sort') + local degree = {} + for t = 1, self.chunk_size do + degree[t] = {} + for i = 1, #self.layers do + degree[t][i] = 0 + end + end + + for t = 1, self.chunk_size do + for i = 1, #self.layers do + local _, dim_out = self.layers[i]:get_dim() + for j = 1, #dim_out do + if self.output_conn[i][j] ~= nil then + local edge = self.output_conn[i][j] + local id, _, time = edge[1], edge[2], edge[3] + t + if time >= 1 and time <= self.chunk_size and id ~= 0 then + degree[time][id] = degree[time][id] + 1 + end + end + end + end + end + + self.queue = {} + local l = 1 + local r = 0 + for t = 1, self.chunk_size do + for i = 1, #self.layers do + if degree[t][i] == 0 then + r = r + 1 + self.queue[r] = {chunk = t, id = i} + end + end + end + while l<=r do + local t, i = self.queue[l].chunk, self.queue[l].id + l = l + 1 + local _, dim_out = self.layers[i]:get_dim() + for j = 1, #dim_out do + if self.output_conn[i][j] ~= nil then + local edge = self.output_conn[i][j] + local id, _, time = edge[1], edge[2], edge[3] + t + if time >= 1 and time <= self.chunk_size and id ~= 0 then + degree[time][id] = degree[time][id] - 1 + if degree[time][id] == 0 then + r = r + 1 + self.queue[r] = {chunk = time, id = id} + end + end + end + end + end + + if r ~= self.chunk_size * #self.layers then + nerv.error('loop detected') + end +end + +function network:make_initial_store() + nerv.info('Network initing storage') + + -- allocate memory + local memory = {} + local err_memory = {} + for t = 1 - self.delay, self.chunk_size + self.delay do + memory[t] = {} + err_memory[t] = {} + for i = 1, #self.layers do + memory[t][i] = {} + err_memory[t][i] = {} + local dim_in, dim_out = self.layers[i]:get_dim() + for j = 1, #dim_in do + err_memory[t][i][j] = self.mat_type(self.batch_size, dim_in[j]) + err_memory[t][i][j]:fill(0) + end + for j = 1, #dim_out do + memory[t][i][j] = self.mat_type(self.batch_size, dim_out[j]) + memory[t][i][j]:fill(self.nn_act_default) + end + end + -- memory[t][0] stores network input + memory[t][0] = {} + for j = 1, #self.dim_in do + memory[t][0][j] = self.mat_type(self.batch_size, self.dim_in[j]) + memory[t][0][j]:fill(self.nn_act_default) + end + -- err_memory[t][0] stores network err_input + err_memory[t][0] = {} + for j = 1, #self.dim_out do + err_memory[t][0][j] = self.mat_type(self.batch_size, self.dim_out[j]) + err_memory[t][0][j]:fill(0) + end + end + + -- connect memory and reference + self.input = {} + self.output = {} + self.err_input = {} + self.err_output = {} + for t = 1, self.chunk_size do + self.input[t] = {} + self.output[t] = {} + self.err_input[t] = {} + self.err_output[t] = {} + for i = 1, #self.layers do + self.input[t][i] = {} + self.output[t][i] = {} + self.err_input[t][i] = {} + self.err_output[t][i] = {} + local dim_in, dim_out = self.layers[i]:get_dim() + for j = 1, #dim_in do + local edge = self.input_conn[i][j] + local id, port, time = edge[1], edge[2], edge[3] + if id ~= 0 or t - time < 1 or t - time > self.chunk_size then + self.input[t][i][j] = memory[t - time][id][port] + end + if id ~= 0 then + self.err_output[t][i][j] = err_memory[t][i][j] + end + end + for j = 1, #dim_out do + local edge = self.output_conn[i][j] + local id, port, time = edge[1], edge[2], edge[3] + if id ~= 0 then + self.output[t][i][j] = memory[t][i][j] + end + if id ~= 0 or t + time < 1 or t + time > self.chunk_size then + self.err_input[t][i][j] = err_memory[t + time][id][port] + end + end + end + end + + -- check dangling reference + for t = 1, self.chunk_size do + for i = 1, #self.dim_in do + local edge = self.socket.inputs[i] + local id, port, time = edge[1], edge[2], edge[3] + if t + time >= 1 and t + time <= self.chunk_size then + if self.input[t + time][id][port] ~= nil then + nerv.error('input reference not nil') + end + self.input[t + time][id][port] = true -- just a place holder + if self.err_output[t + time][id][port] ~= nil then + nerv.error('err_output reference not nil') + end + self.err_output[t + time][id][port] = true -- just a place holder + end + end + for i = 1, #self.dim_out do + local edge = self.socket.outputs[i] + local id, port, time = edge[1], edge[2], edge[3] + if t - time >= 1 and t - time <= self.chunk_size then + if self.output[t - time][id][port] ~= nil then + nerv.error('output reference not nil') + end + self.output[t - time][id][port] = true -- just a place holder + if self.err_input[t - time][id][port] ~= nil then + nerv.error('err_output reference not nil') + end + self.err_input[t - time][id][port] = true -- just a place holder + end + end + end + for t = 1, self.chunk_size do + for i = 1, #self.layers do + local dim_in, dim_out = self.layers[i]:get_dim() + for j = 1, #dim_in do + if self.input[t][i][j] == nil then + nerv.error('input reference dangling') + end + if self.err_output[t][i][j] == nil then + nerv.error('err_output reference dangling') + end + end + for j = 1, #dim_out do + if self.output[t][i][j] == nil then + nerv.error('output reference dangling') + end + if self.err_input[t][i][j] == nil then + nerv.error('err_input reference dangling') + end + end + end + end + + -- allocate reference for legacy of previous mini-batch + self.legacy = {} + for t = 1 - self.delay, 0 do + self.legacy[t] = {} + for i = 1, #self.layers do + self.legacy[t][i] = {} + local _, dim_out = self.layers[i]:get_dim() + for j = 1, #dim_out do + self.legacy[t][i][j] = memory[t][i][j] + end + end + end +end + +function network:mini_batch_init(information) + self.info = information + self.max_chunk = 0 + for i = 1, self.batch_size do + if self.info.seq_length[i] > self.max_chunk then + self.max_chunk = self.info.seq_length[i] + end + end + for t = 1 - self.delay, 0 do + for i = 1, #self.layers do + local _, dim_out = self.layers[i]:get_dim() + for j = 1, #dim_out do + self.output[t][i][j]:copy_from(self.output[t + self.chunk_size][i][j]) + end + end + end + for t = self.max_chunk + 1, self.max_chunk + self.delay do + if t > self.chunk_size then + break + end + for i = 1, #self.layers do + local dim_in, _ = self.layers[i]:get_dim() + for j = 1, #dim_in do + self.err_output[t][i][j]:fill(0) + end + end + end +end + +function network:propagate(input, output) +end |