diff options
-rw-r--r-- | nerv/Makefile | 2 | ||||
-rw-r--r-- | nerv/layer/duplicate.lua | 4 | ||||
-rw-r--r-- | nerv/layer/identity.lua | 2 | ||||
-rw-r--r-- | nerv/layer/init.lua | 1 | ||||
-rw-r--r-- | nerv/main.lua | 36 | ||||
-rw-r--r-- | nerv/nn/network.lua | 32 |
6 files changed, 52 insertions, 25 deletions
diff --git a/nerv/Makefile b/nerv/Makefile index 0a2aa86..a9b4baf 100644 --- a/nerv/Makefile +++ b/nerv/Makefile @@ -34,7 +34,7 @@ LUA_LIBS := matrix/init.lua io/init.lua init.lua \ layer/init.lua layer/affine.lua layer/sigmoid.lua layer/tanh.lua layer/softmax_ce.lua layer/softmax.lua \ layer/window.lua layer/bias.lua layer/combiner.lua layer/mse.lua \ layer/elem_mul.lua layer/lstm.lua layer/lstm_gate.lua layer/dropout.lua layer/gru.lua \ - layer/graph.lua layer/rnn.lua layer/duplicate.lua\ + layer/graph.lua layer/rnn.lua layer/duplicate.lua layer/identity.lua \ nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/layer_dag.lua nn/network.lua \ io/sgd_buffer.lua \ tnn/init.lua tnn/sutil.lua tnn/tnn.lua diff --git a/nerv/layer/duplicate.lua b/nerv/layer/duplicate.lua index fbd4a9e..1a93b26 100644 --- a/nerv/layer/duplicate.lua +++ b/nerv/layer/duplicate.lua @@ -30,9 +30,9 @@ function DuplicateLayer:propagate(input, output) end function DuplicateLayer:back_propagate(bp_err, next_bp_err) - next_bp_err:copy_from(bp_err[1]) + next_bp_err[1]:copy_from(bp_err[1]) for i = 2, #self.dim_out do - next_bp_err:add(next_bp_err, bp_err[i], 1.0, 1.0) + next_bp_err[1]:add(next_bp_err[1], bp_err[i], 1.0, 1.0) end end diff --git a/nerv/layer/identity.lua b/nerv/layer/identity.lua index dc796fb..aeeff89 100644 --- a/nerv/layer/identity.lua +++ b/nerv/layer/identity.lua @@ -22,7 +22,7 @@ function IdentityLayer:propagate(input, output) end function IdentityLayer:back_propagate(bp_err, next_bp_err) - next_bp_err[1]:copy_from(bp_err) + next_bp_err[1]:copy_from(bp_err[1]) end function IdentityLayer:update() diff --git a/nerv/layer/init.lua b/nerv/layer/init.lua index 6f26d4d..39f97b1 100644 --- a/nerv/layer/init.lua +++ b/nerv/layer/init.lua @@ -126,6 +126,7 @@ nerv.include('dropout.lua') nerv.include('gru.lua') nerv.include('rnn.lua') nerv.include('duplicate.lua') +nerv.include('identity.lua') -- The following lines are for backward compatibility, and will be removed in -- the future. The use of these names are deprecated. diff --git a/nerv/main.lua b/nerv/main.lua index 5cb7d07..865aba0 100644 --- a/nerv/main.lua +++ b/nerv/main.lua @@ -1,8 +1,10 @@ -print 'Hello' - local global_conf = { cumat_type = nerv.CuMatrixFloat, param_random = function() return 0 end, + lrate = 0.1, + wcost = 0, + momentum = 0.9, + batch_size = 2, } local layer_repo = nerv.LayerRepo( @@ -11,13 +13,13 @@ local layer_repo = nerv.LayerRepo( rnn = {dim_in = {23}, dim_out = {26}}, }, ['nerv.AffineLayer'] = { - input = {dim_in = {20}, dim_out = {23}}, + input = {dim_in = {62}, dim_out = {23}}, output = {dim_in = {26, 79}, dim_out = {79}}, }, ['nerv.SigmoidLayer'] = { sigmoid = {dim_in = {23}, dim_out = {23}}, }, - ['nerv.SoftmaxLayer'] = { + ['nerv.IdentityLayer'] = { softmax = {dim_in = {79}, dim_out = {79}}, }, ['nerv.DuplicateLayer'] = { @@ -36,8 +38,30 @@ local connections = { {'softmax[1]', '<output>[1]', 0}, } -local graph = nerv.GraphLayer('graph', global_conf, {dim_in = {20}, dim_out = {79}, layer_repo = layer_repo, connections = connections}) +local graph = nerv.GraphLayer('graph', global_conf, {dim_in = {62}, dim_out = {79}, layer_repo = layer_repo, connections = connections}) local network = nerv.Network('network', global_conf, {network = graph}) -network:init(2,5) +local batch = global_conf.batch_size +local chunk = 5 +network:init(batch, chunk) + +local input = {} +local output = {} +local err_input = {} +local err_output = {} +local input_size = 62 +local output_size = 79 +for i = 1, chunk do + input[i] = {global_conf.cumat_type(batch, input_size)} + output[i] = {global_conf.cumat_type(batch, output_size)} + err_input[i] = {global_conf.cumat_type(batch, output_size)} + err_output[i] = {global_conf.cumat_type(batch, input_size)} +end + +for i = 1, 100 do + network:mini_batch_init({seq_length = {5, 3}, new_seq = {2}}) + network:propagate(input, output) + network:back_propagate(err_input, err_output, input, output) + network:update(err_input, input, output) +end diff --git a/nerv/nn/network.lua b/nerv/nn/network.lua index 3cf052b..0bbcc59 100644 --- a/nerv/nn/network.lua +++ b/nerv/nn/network.lua @@ -320,7 +320,7 @@ function network:make_initial_store() end function network:set_input(input) - for t = 1, #self.chunk_size do + for t = 1, self.chunk_size do for i = 1, #self.dim_in do local edge = self.socket.inputs[i] local id, port, time = edge[1], edge[2], edge[3] @@ -332,7 +332,7 @@ function network:set_input(input) end function network:set_output(output) - for t = 1, #self.chunk_size do + for t = 1, self.chunk_size do for i = 1, #self.dim_out do local edge = self.socket.outputs[i] local id, port, time = edge[1], edge[2], edge[3] @@ -344,7 +344,7 @@ function network:set_output(output) end function network:set_err_input(err_input) - for t = 1, #self.chunk_size do + for t = 1, self.chunk_size do for i = 1, #self.dim_out do local edge = self.socket.outputs[i] local id, port, time = edge[1], edge[2], edge[3] @@ -391,7 +391,9 @@ function network:mini_batch_init(information) for i = 1, #self.layers do local _, dim_out = self.layers[i]:get_dim() for j = 1, #dim_out do - self.legacy[t][i][j]:copy_from(self.output[t + self.chunk_size][i][j]) + if t + self.chunk_size >= 1 and self.output_conn[i][j][1] ~= 0 then + self.legacy[t][i][j]:copy_from(self.output[t + self.chunk_size][i][j]) + end for k = 1, #self.info.new_seq do local batch = self.info.new_seq[k] self.legacy[t][i][j][batch - 1]:fill(self.nn_act_default) @@ -414,8 +416,8 @@ function network:mini_batch_init(information) end function network:propagate(input, output) - network:set_input(input) - network:set_output(output) + self:set_input(input) + self:set_output(output) for i = 1, #self.queue do local t, id = self.queue[i].chunk, self.queue[i].id if t <= self.max_length then @@ -433,18 +435,18 @@ function network:propagate(input, output) end function network:back_propagate(bp_err, next_bp_err, input, output) - network:set_input(input) - network:set_output(output) - network:set_err_input(bp_err) - network:set_err_output(next_bp_err) + self:set_input(input) + self:set_output(output) + self:set_err_input(bp_err) + self:set_err_output(next_bp_err) for i = #self.queue, 1, -1 do local t, id = self.queue[i].chunk, self.queue[i].id if t <= self.max_length then -- flush border gradient for j = 1, #self.border[t] do local batch = self.border[t][j] - local dim_in, _ = self.layers[id]:get_dim() - for k = 1, #dim_in do + local _, dim_out = self.layers[id]:get_dim() + for k = 1, #dim_out do self.err_input[t][id][k][batch - 1]:fill(0) end end @@ -460,9 +462,9 @@ function network:back_propagate(bp_err, next_bp_err, input, output) end function network:update(bp_err, input, output) - network:set_input(input) - network:set_output(output) - network:set_err_input(bp_err) + self:set_input(input) + self:set_output(output) + self:set_err_input(bp_err) for i = 1, #self.queue do local t, id = self.queue[i].chunk, self.queue[i].id if t <= self.max_length then |