From 4e3cf34a29afbdb17c42ab7f1efacec52efd0e8b Mon Sep 17 00:00:00 2001
From: txh18 <cloudygooseg@gmail.com>
Date: Sun, 6 Dec 2015 19:18:07 +0800
Subject: added extend_t for tnn to save GPU memory

---
 nerv/examples/lmptb/bilstmlm_ptb_main.lua |  2 +-
 nerv/tnn/tnn.lua                          | 35 ++++++++++++++++++-------------
 2 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/nerv/examples/lmptb/bilstmlm_ptb_main.lua b/nerv/examples/lmptb/bilstmlm_ptb_main.lua
index cf0009b..2aba1d2 100644
--- a/nerv/examples/lmptb/bilstmlm_ptb_main.lua
+++ b/nerv/examples/lmptb/bilstmlm_ptb_main.lua
@@ -364,7 +364,7 @@ else
     nerv.printf("%s no user setting, all default...\n", global_conf.sche_log_pre)
 end
 
-global_conf.work_dir = global_conf.work_dir_base .. 'h' .. global_conf.hidden_size .. 'l' .. global_conf.layer_num .. 'ch' .. global_conf.chunk_size .. 'ba' .. global_conf.batch_size .. 'slr' ..   global_conf.lrate .. 'wc' .. global_conf.wcost .. 'dr' .. global_conf.dropout_str 
+global_conf.work_dir = global_conf.work_dir_base .. 'h' .. global_conf.hidden_size .. 'l' .. global_conf.layer_num --.. 'ch' .. global_conf.chunk_size .. 'ba' .. global_conf.batch_size .. 'slr' ..   global_conf.lrate .. 'wc' .. global_conf.wcost .. 'dr' .. global_conf.dropout_str 
 global_conf.train_fn_shuf = global_conf.work_dir .. '/train_fn_shuf'
 global_conf.train_fn_shuf_bak = global_conf.train_fn_shuf .. '_bak'
 global_conf.param_fn = global_conf.work_dir .. "/params"
diff --git a/nerv/tnn/tnn.lua b/nerv/tnn/tnn.lua
index 56c9dc0..beb73ca 100644
--- a/nerv/tnn/tnn.lua
+++ b/nerv/tnn/tnn.lua
@@ -59,12 +59,12 @@ nerv.TNN.FC.HAS_INPUT = 1
 nerv.TNN.FC.HAS_LABEL = 2
 nerv.TNN.FC.SEQ_NORM = bit.bor(nerv.TNN.FC.HAS_INPUT, nerv.TNN.FC.HAS_LABEL) --This instance have both input and label
 
-function TNN.make_initial_store(st, p, dim, batch_size, chunk_size, global_conf, st_c, p_c, t_c)
-    --Return a table of matrix storage from time (1-chunk_size)..(2*chunk_size)
+function TNN.make_initial_store(st, p, dim, batch_size, chunk_size, extend_t, global_conf, st_c, p_c, t_c)
+    --Return a table of matrix storage from time (1-extend_t)..(chunk_size+extend_t)
     if (type(st) ~= "table") then
         nerv.error("st should be a table")
     end
-    for i = 1 - chunk_size - 1, chunk_size * 2 + 1 do --intentionally allocated more time, should be [1-chunk_size, chunk_size*2]
+    for i = 1 - extend_t - 1, chunk_size + extend_t + 1 do --intentionally allocated more time
         if (st[i] == nil) then
             st[i] = {}
         end
@@ -97,6 +97,13 @@ function TNN:__init(id, global_conf, layer_conf)
     if self.clip_t > 0 then
         nerv.info("tnn(%s) will clip gradient across time with %f...", id, self.clip_t)
     end
+
+    self.extend_t = layer_conf.extend_t --TNN will allocate storage of time for 1-extend_t .. chunk_size+extend_t
+    if self.extend_t == nil then
+        self.extend_t = 5 
+    end
+    nerv.info("tnn(%s) will extend storage beyond MB border for time steps %d...", id, self.extend_t)
+
     local layers = {}
     local inputs_p = {} --map:port of the TNN to layer ref and port
     local outputs_p = {}
@@ -173,9 +180,9 @@ function TNN:init(batch_size, chunk_size)
 
         print("TNN initing storage", ref_from.layer.id, "->", ref_to.layer.id)
         ref_to.inputs_matbak_p[port_to] = self.gconf.cumat_type(batch_size, dim)
-        self.make_initial_store(ref_from.outputs_m, port_from, dim, batch_size, chunk_size, self.gconf, ref_to.inputs_m, port_to, time)
+        self.make_initial_store(ref_from.outputs_m, port_from, dim, batch_size, chunk_size, self.extend_t, self.gconf, ref_to.inputs_m, port_to, time)
         ref_from.err_inputs_matbak_p[port_from] =  self.gconf.cumat_type(batch_size, dim)
-        self.make_initial_store(ref_from.err_inputs_m, port_from, dim, batch_size, chunk_size, self.gconf, ref_to.err_outputs_m, port_to, time)
+        self.make_initial_store(ref_from.err_inputs_m, port_from, dim, batch_size, chunk_size, self.extend_t, self.gconf, ref_to.err_outputs_m, port_to, time)
         
     end
 
@@ -184,8 +191,8 @@ function TNN:init(batch_size, chunk_size)
     for i = 1, #self.dim_out do --Init storage for output ports 
         local ref = self.outputs_p[i].ref
         local p = self.outputs_p[i].port
-        self.make_initial_store(ref.outputs_m, p, self.dim_out[i], batch_size, chunk_size, self.gconf, self.outputs_m, i, 0)
-        self.make_initial_store(ref.err_inputs_m, p, self.dim_out[i], batch_size, chunk_size, self.gconf, self.err_inputs_m, i, 0)
+        self.make_initial_store(ref.outputs_m, p, self.dim_out[i], batch_size, chunk_size, self.extend_t, self.gconf, self.outputs_m, i, 0)
+        self.make_initial_store(ref.err_inputs_m, p, self.dim_out[i], batch_size, chunk_size, self.extend_t, self.gconf, self.err_inputs_m, i, 0)
     end 
 
     self.inputs_m = {}
@@ -193,8 +200,8 @@ function TNN:init(batch_size, chunk_size)
     for i = 1, #self.dim_in do --Init storage for input ports 
         local ref = self.inputs_p[i].ref
         local p = self.inputs_p[i].port
-        self.make_initial_store(ref.inputs_m, p, self.dim_in[i], batch_size, chunk_size, self.gconf, self.inputs_m, i, 0)
-        self.make_initial_store(ref.err_outputs_m, p, self.dim_in[i], batch_size, chunk_size, self.gconf, self.err_outputs_m, i, 0)
+        self.make_initial_store(ref.inputs_m, p, self.dim_in[i], batch_size, chunk_size, self.extend_t, self.gconf, self.inputs_m, i, 0)
+        self.make_initial_store(ref.err_outputs_m, p, self.dim_in[i], batch_size, chunk_size, self.extend_t, self.gconf, self.err_outputs_m, i, 0)
     end 
 
     for id, ref in pairs(self.layers) do --Calling init for child layers
@@ -260,7 +267,7 @@ function TNN:flush_all() --flush all history and activation
     local _, ref
     for _, ref in pairs(self.layers) do
         for i = 1, #ref.dim_in do
-            for t = 1 - self.chunk_size, self.chunk_size * 2 do 
+            for t = 1 - self.extend_t, self.chunk_size + self.extend_t do 
                 ref.inputs_m[t][i]:fill(self.gconf.nn_act_default)
                 if (ref.inputs_b[t] == nil) then
                     ref.inputs_b[t] = {}
@@ -274,7 +281,7 @@ function TNN:flush_all() --flush all history and activation
             end
         end
         for i = 1, #ref.dim_out do
-            for t = 1 - self.chunk_size, self.chunk_size * 2 do 
+            for t = 1 - self.extend_t, self.chunk_size + self.extend_t do 
                 ref.outputs_m[t][i]:fill(self.gconf.nn_act_default)
                 if (ref.outputs_b[t] == nil) then
                     ref.outputs_b[t] = {}
@@ -302,13 +309,13 @@ end
 function TNN:move_right_to_nextmb(list_t) --move output history activations of 1..chunk_size to 1-chunk_size..0 
     if list_t == nil then
         list_t = {}
-        for i = 1, self.chunk_size do
-            list_t[i] = i - self.chunk_size
+        for i = self.extend_t, 1, -1 do
+            list_t[i] = 1 - i
         end
     end
     for i = 1, #list_t do
         t = list_t[i]
-        if t < 1 - self.chunk_size or t > 0 then
+        if t < 1 - self.extend_t or t > 0 then
             nerv.error("MB move range error")
         end
         for id, ref in pairs(self.layers) do
-- 
cgit v1.2.3-70-g09d2