From f26288ba61d3d16866e1b227a71e7d9c46923436 Mon Sep 17 00:00:00 2001
From: Qi Liu <liuq901@163.com>
Date: Fri, 11 Mar 2016 13:32:00 +0800
Subject: update mini_batch_init

---
 lua/config.lua      |  4 +--
 lua/main.lua        |  4 ++-
 lua/network.lua     | 30 ++++++++++++----------
 lua/reader.lua      |  3 ++-
 nerv/main.lua       | 73 -----------------------------------------------------
 nerv/nn/network.lua | 63 ++++++++++++++++++++++++++-------------------
 6 files changed, 61 insertions(+), 116 deletions(-)
 delete mode 100644 nerv/main.lua

diff --git a/lua/config.lua b/lua/config.lua
index 9d73b64..1ec1198 100644
--- a/lua/config.lua
+++ b/lua/config.lua
@@ -12,7 +12,7 @@ function get_global_conf()
         layer_num = 1,
         chunk_size = 15,
         batch_size = 20,
-        max_iter = 1,
+        max_iter = 3,
         param_random = function() return (math.random() / 5 - 0.1) end,
         dropout = 0.5,
         timer = nerv.Timer(),
@@ -34,7 +34,7 @@ function get_layers(global_conf)
             output = {dim_in = {global_conf.hidden_size}, dim_out = {global_conf.vocab_size}, pr = pr}
         },
         ['nerv.SoftmaxCELayer'] = {
-            softmax = {dim_in = {global_conf.vocab_size, global_conf.vocab_size}, dim_out = {1}},
+            softmax = {dim_in = {global_conf.vocab_size, global_conf.vocab_size}, dim_out = {1}, compressed = true},
         },
     }
     for i = 1, global_conf.layer_num do
diff --git a/lua/main.lua b/lua/main.lua
index 684efac..ce0270a 100644
--- a/lua/main.lua
+++ b/lua/main.lua
@@ -9,7 +9,7 @@ local timer = global_conf.timer
 timer:tic('IO')
 
 local data_path = 'nerv/nerv/examples/lmptb/PTBdata/'
-local train_reader = nerv.Reader(data_path .. 'vocab', data_path .. 'ptb.train.txt.adds')
+local train_reader = nerv.Reader(data_path .. 'vocab', data_path .. 'ptb.valid.txt.adds')
 local val_reader = nerv.Reader(data_path .. 'vocab', data_path .. 'ptb.valid.txt.adds')
 
 local train_data = train_reader:get_all_batch(global_conf)
@@ -41,3 +41,5 @@ for i = 1, global_conf.max_iter do
 end
 timer:toc('global')
 timer:check('global')
+timer:check('network')
+timer:check('gc')
diff --git a/lua/network.lua b/lua/network.lua
index 6280f24..0c11321 100644
--- a/lua/network.lua
+++ b/lua/network.lua
@@ -57,12 +57,11 @@ function nn:get_data(data)
             ret[i].err_output[t][1] = err_output[t]
             ret[i].err_output[t][2] = softmax_output[t]
         end
-        ret[i].info = {}
-        ret[i].info.seq_length = data[i].seq_len
-        ret[i].info.new_seq = {}
+        ret[i].seq_length = data[i].seq_len
+        ret[i].new_seq = {}
         for j = 1, self.gconf.batch_size do
             if data[i].seq_start[j] then
-                table.insert(ret[i].info.new_seq, j)
+                table.insert(ret[i].new_seq, j)
             end
         end
     end
@@ -70,34 +69,39 @@ function nn:get_data(data)
 end
 
 function nn:process(data, do_train)
+    local timer = self.gconf.timer
     local total_err = 0
     local total_frame = 0
     for id = 1, #data do
         if do_train then
             self.gconf.dropout_rate = self.gconf.dropout
+            data[id].do_train = true
         else
             self.gconf.dropout_rate = 0
+            data[id].do_train = false
         end
-        self.network:mini_batch_init(data[id].info)
-        local input = {}
-        for t = 1, self.gconf.chunk_size do
-            input[t] = {data[id].input[t][1], data[id].input[t][2]:decompress(self.gconf.vocab_size)}
-        end
-        self.network:propagate(input, data[id].output)
+        timer:tic('network')
+        self.network:mini_batch_init(data[id])
+        self.network:propagate()
+        timer:toc('network')
         for t = 1, self.gconf.chunk_size do
             local tmp = data[id].output[t][1]:new_to_host()
             for i = 1, self.gconf.batch_size do
-                if t <= data[id].info.seq_length[i] then
+                if t <= data[id].seq_length[i] then
                     total_err = total_err + math.log10(math.exp(tmp[i - 1][0]))
                     total_frame = total_frame + 1
                 end
             end
         end
         if do_train then
-            self.network:back_propagate(data[id].err_input, data[id].err_output, input, data[id].output)
-            self.network:update(data[id].err_input, input, data[id].output)
+            timer:tic('network')
+            self.network:back_propagate()
+            self.network:update()
+            timer:toc('network')
         end
+        timer:tic('gc')
         collectgarbage('collect')
+        timer:toc('gc')
     end
     return math.pow(10, - total_err / total_frame)
 end
diff --git a/lua/reader.lua b/lua/reader.lua
index 2e51a9c..0c7bcb6 100644
--- a/lua/reader.lua
+++ b/lua/reader.lua
@@ -58,7 +58,8 @@ function Reader:get_all_batch(global_conf)
     for i = 1, global_conf.batch_size do
         pos[i] = nil
     end
-    while true do
+    --while true do
+    for i = 1, 100 do
         local input = {}
         local output = {}
         for i = 1, global_conf.chunk_size do
diff --git a/nerv/main.lua b/nerv/main.lua
deleted file mode 100644
index 7c82ebf..0000000
--- a/nerv/main.lua
+++ /dev/null
@@ -1,73 +0,0 @@
-local global_conf = {
-    cumat_type = nerv.CuMatrixFloat,
-    param_random = function() return 0 end,
-    lrate = 0.1,
-    wcost = 0,
-    momentum = 0.9,
-    batch_size = 2,
-}
-
-local layer_repo = nerv.LayerRepo(
-    {
-        ['nerv.RNNLayer'] = {
-            rnn1 = {dim_in = {23}, dim_out = {26}},
-            rnn2 = {dim_in = {26}, dim_out = {26}},
-        },
-        ['nerv.AffineLayer'] = {
-            input = {dim_in = {62}, dim_out = {23}},
-            output = {dim_in = {26, 79}, dim_out = {79}},
-        },
-        ['nerv.SigmoidLayer'] = {
-            sigmoid = {dim_in = {23}, dim_out = {23}},
-        },
-        ['nerv.IdentityLayer'] = {
-            softmax = {dim_in = {79}, dim_out = {79}},
-        },
-        ['nerv.DuplicateLayer'] = {
-            dup = {dim_in = {79}, dim_out = {79, 79}},
-        },
-    }, nerv.ParamRepo(), global_conf)
-
-local connections = {
-    {'<input>[1]', 'input[1]', 0},
-    {'input[1]', 'sigmoid[1]', 0},
-    {'sigmoid[1]', 'rnn1[1]', 0},
-    {'rnn1[1]', 'rnn2[1]', 0},
-    {'rnn2[1]', 'output[1]', 0},
-    {'output[1]', 'dup[1]', 0},
-    {'dup[1]', 'output[2]', -1},
-    {'dup[2]', 'softmax[1]', 0},
-    {'softmax[1]', '<output>[1]', 0},
-}
-
-local graph = nerv.GraphLayer('graph', global_conf, {dim_in = {62}, dim_out = {79}, layer_repo = layer_repo, connections = connections})
-
-local network = nerv.Network('network', global_conf, {network = graph})
-
-local batch = global_conf.batch_size
-local chunk = 5
-network:init(batch, chunk)
-
-local input = {}
-local output = {}
-local err_input = {}
-local err_output = {}
-local input_size = 62
-local output_size = 79
-for i = 1, chunk do
-    input[i] = {global_conf.cumat_type(batch, input_size)}
-    output[i] = {global_conf.cumat_type(batch, output_size)}
-    err_input[i] = {global_conf.cumat_type(batch, output_size)}
-    err_output[i] = {global_conf.cumat_type(batch, input_size)}
-end
-
-for i = 1, 100 do
-    network:mini_batch_init({seq_length = {5, 3}, new_seq = {2}})
-    network:propagate(input, output)
-    network:back_propagate(err_input, err_output, input, output)
-    network:update(err_input, input, output)
-end
-
-local tmp = network:get_params()
-
-tmp:export('../../workspace/test.param')
diff --git a/nerv/nn/network.lua b/nerv/nn/network.lua
index 39df5f0..35e11e3 100644
--- a/nerv/nn/network.lua
+++ b/nerv/nn/network.lua
@@ -2,8 +2,9 @@ local network = nerv.class('nerv.Network')
 
 function network:__init(id, global_conf, network_conf)
     self.id = id
-    self.dim_in = network_conf.network.dim_in
-    self.dim_out = network_conf.network.dim_out
+    self.network = network_conf.network
+    self.dim_in = self.network.dim_in
+    self.dim_out = self.network.dim_out
     self.gconf = global_conf
     if self.gconf.use_cpu then
         self.mat_type = self.gconf.mmat_type
@@ -18,7 +19,6 @@ function network:__init(id, global_conf, network_conf)
     self.layers = {}
     self.input_conn = {}
     self.output_conn = {}
-    self.network = network_conf.network
     self.socket = self:compile(self.network)
     for i = 1, #self.dim_in do
         local edge = self.socket.inputs[i]
@@ -368,8 +368,21 @@ function network:set_err_output(err_output)
     end
 end
 
-function network:mini_batch_init(information)
-    self.info = information
+--[[
+    [info] is a table that contains information of current mini-batch. These fields must be contained:
+        [input], [output] : matrix array which stores the network input and output
+        [seq_length] : a table contains the length of every sequences
+        [new_seq]: a table contains the batch number of new sequences
+        [do_train]: a bool value indicates do train or not
+        if [do_train] is true, these fileds also must be contained:
+            [err_input], [err_output] : matrix array which stores the network err_input and err_output
+--]]
+function network:mini_batch_init(info)
+    self.info = info
+    self:set_input(self.info.input)
+    self:set_output(self.info.output)
+
+    -- calculate border
     self.max_length = 0
     self.border = {}
     for i = 1, self.chunk_size do
@@ -387,6 +400,7 @@ function network:mini_batch_init(information)
             table.insert(self.border[chunk], i)
         end
     end
+
     -- copy legacy
     for t = 1 - self.delay, 0 do
         for i = 1, #self.layers do
@@ -402,23 +416,27 @@ function network:mini_batch_init(information)
             end
         end
     end
-    -- flush border gradient
-    for t = self.max_length + 1, self.max_length + self.delay do
-        if t > self.chunk_size then
-            break
-        end
-        for i = 1, #self.layers do
-            local dim_in, _ = self.layers[i]:get_dim()
-            for j = 1, #dim_in do
-                self.err_output[t][i][j]:fill(0)
+
+    if self.info.do_train then
+        self:set_err_input(self.info.err_input)
+        self:set_err_output(self.info.err_output)
+        
+        -- flush border gradient
+        for t = self.max_length + 1, self.max_length + self.delay do
+            if t > self.chunk_size then
+                break
+            end
+            for i = 1, #self.layers do
+                local dim_in, _ = self.layers[i]:get_dim()
+                for j = 1, #dim_in do
+                    self.err_output[t][i][j]:fill(0)
+                end
             end
         end
     end
 end
 
-function network:propagate(input, output)
-    self:set_input(input)
-    self:set_output(output)
+function network:propagate()
     for i = 1, #self.queue do
         local t, id = self.queue[i].chunk, self.queue[i].id
         if t <= self.max_length then
@@ -435,11 +453,7 @@ function network:propagate(input, output)
     end
 end
 
-function network:back_propagate(bp_err, next_bp_err, input, output)
-    self:set_input(input)
-    self:set_output(output)
-    self:set_err_input(bp_err)
-    self:set_err_output(next_bp_err)
+function network:back_propagate()
     for i = #self.queue, 1, -1 do
         local t, id = self.queue[i].chunk, self.queue[i].id
         if t <= self.max_length then
@@ -462,10 +476,7 @@ function network:back_propagate(bp_err, next_bp_err, input, output)
     end
 end
 
-function network:update(bp_err, input, output)
-    self:set_input(input)
-    self:set_output(output)
-    self:set_err_input(bp_err)
+function network:update()
     for i = 1, #self.queue do
         local t, id = self.queue[i].chunk, self.queue[i].id
         if t <= self.max_length then
-- 
cgit v1.2.3-70-g09d2