aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortxh18 <cloudygooseg@gmail.com>2015-12-06 19:18:07 +0800
committertxh18 <cloudygooseg@gmail.com>2015-12-06 19:18:07 +0800
commit4e3cf34a29afbdb17c42ab7f1efacec52efd0e8b (patch)
tree9da189094ac9964ba4566802e1bb496e13ca64ab
parent313011f24dfdacfe9c18d018d5bb877625a09ec7 (diff)
added extend_t for tnn to save GPU memory
-rw-r--r--nerv/examples/lmptb/bilstmlm_ptb_main.lua2
-rw-r--r--nerv/tnn/tnn.lua35
2 files changed, 22 insertions, 15 deletions
diff --git a/nerv/examples/lmptb/bilstmlm_ptb_main.lua b/nerv/examples/lmptb/bilstmlm_ptb_main.lua
index cf0009b..2aba1d2 100644
--- a/nerv/examples/lmptb/bilstmlm_ptb_main.lua
+++ b/nerv/examples/lmptb/bilstmlm_ptb_main.lua
@@ -364,7 +364,7 @@ else
nerv.printf("%s no user setting, all default...\n", global_conf.sche_log_pre)
end
-global_conf.work_dir = global_conf.work_dir_base .. 'h' .. global_conf.hidden_size .. 'l' .. global_conf.layer_num .. 'ch' .. global_conf.chunk_size .. 'ba' .. global_conf.batch_size .. 'slr' .. global_conf.lrate .. 'wc' .. global_conf.wcost .. 'dr' .. global_conf.dropout_str
+global_conf.work_dir = global_conf.work_dir_base .. 'h' .. global_conf.hidden_size .. 'l' .. global_conf.layer_num --.. 'ch' .. global_conf.chunk_size .. 'ba' .. global_conf.batch_size .. 'slr' .. global_conf.lrate .. 'wc' .. global_conf.wcost .. 'dr' .. global_conf.dropout_str
global_conf.train_fn_shuf = global_conf.work_dir .. '/train_fn_shuf'
global_conf.train_fn_shuf_bak = global_conf.train_fn_shuf .. '_bak'
global_conf.param_fn = global_conf.work_dir .. "/params"
diff --git a/nerv/tnn/tnn.lua b/nerv/tnn/tnn.lua
index 56c9dc0..beb73ca 100644
--- a/nerv/tnn/tnn.lua
+++ b/nerv/tnn/tnn.lua
@@ -59,12 +59,12 @@ nerv.TNN.FC.HAS_INPUT = 1
nerv.TNN.FC.HAS_LABEL = 2
nerv.TNN.FC.SEQ_NORM = bit.bor(nerv.TNN.FC.HAS_INPUT, nerv.TNN.FC.HAS_LABEL) --This instance have both input and label
-function TNN.make_initial_store(st, p, dim, batch_size, chunk_size, global_conf, st_c, p_c, t_c)
- --Return a table of matrix storage from time (1-chunk_size)..(2*chunk_size)
+function TNN.make_initial_store(st, p, dim, batch_size, chunk_size, extend_t, global_conf, st_c, p_c, t_c)
+ --Return a table of matrix storage from time (1-extend_t)..(chunk_size+extend_t)
if (type(st) ~= "table") then
nerv.error("st should be a table")
end
- for i = 1 - chunk_size - 1, chunk_size * 2 + 1 do --intentionally allocated more time, should be [1-chunk_size, chunk_size*2]
+ for i = 1 - extend_t - 1, chunk_size + extend_t + 1 do --intentionally allocated more time
if (st[i] == nil) then
st[i] = {}
end
@@ -97,6 +97,13 @@ function TNN:__init(id, global_conf, layer_conf)
if self.clip_t > 0 then
nerv.info("tnn(%s) will clip gradient across time with %f...", id, self.clip_t)
end
+
+ self.extend_t = layer_conf.extend_t --TNN will allocate storage of time for 1-extend_t .. chunk_size+extend_t
+ if self.extend_t == nil then
+ self.extend_t = 5
+ end
+ nerv.info("tnn(%s) will extend storage beyond MB border for time steps %d...", id, self.extend_t)
+
local layers = {}
local inputs_p = {} --map:port of the TNN to layer ref and port
local outputs_p = {}
@@ -173,9 +180,9 @@ function TNN:init(batch_size, chunk_size)
print("TNN initing storage", ref_from.layer.id, "->", ref_to.layer.id)
ref_to.inputs_matbak_p[port_to] = self.gconf.cumat_type(batch_size, dim)
- self.make_initial_store(ref_from.outputs_m, port_from, dim, batch_size, chunk_size, self.gconf, ref_to.inputs_m, port_to, time)
+ self.make_initial_store(ref_from.outputs_m, port_from, dim, batch_size, chunk_size, self.extend_t, self.gconf, ref_to.inputs_m, port_to, time)
ref_from.err_inputs_matbak_p[port_from] = self.gconf.cumat_type(batch_size, dim)
- self.make_initial_store(ref_from.err_inputs_m, port_from, dim, batch_size, chunk_size, self.gconf, ref_to.err_outputs_m, port_to, time)
+ self.make_initial_store(ref_from.err_inputs_m, port_from, dim, batch_size, chunk_size, self.extend_t, self.gconf, ref_to.err_outputs_m, port_to, time)
end
@@ -184,8 +191,8 @@ function TNN:init(batch_size, chunk_size)
for i = 1, #self.dim_out do --Init storage for output ports
local ref = self.outputs_p[i].ref
local p = self.outputs_p[i].port
- self.make_initial_store(ref.outputs_m, p, self.dim_out[i], batch_size, chunk_size, self.gconf, self.outputs_m, i, 0)
- self.make_initial_store(ref.err_inputs_m, p, self.dim_out[i], batch_size, chunk_size, self.gconf, self.err_inputs_m, i, 0)
+ self.make_initial_store(ref.outputs_m, p, self.dim_out[i], batch_size, chunk_size, self.extend_t, self.gconf, self.outputs_m, i, 0)
+ self.make_initial_store(ref.err_inputs_m, p, self.dim_out[i], batch_size, chunk_size, self.extend_t, self.gconf, self.err_inputs_m, i, 0)
end
self.inputs_m = {}
@@ -193,8 +200,8 @@ function TNN:init(batch_size, chunk_size)
for i = 1, #self.dim_in do --Init storage for input ports
local ref = self.inputs_p[i].ref
local p = self.inputs_p[i].port
- self.make_initial_store(ref.inputs_m, p, self.dim_in[i], batch_size, chunk_size, self.gconf, self.inputs_m, i, 0)
- self.make_initial_store(ref.err_outputs_m, p, self.dim_in[i], batch_size, chunk_size, self.gconf, self.err_outputs_m, i, 0)
+ self.make_initial_store(ref.inputs_m, p, self.dim_in[i], batch_size, chunk_size, self.extend_t, self.gconf, self.inputs_m, i, 0)
+ self.make_initial_store(ref.err_outputs_m, p, self.dim_in[i], batch_size, chunk_size, self.extend_t, self.gconf, self.err_outputs_m, i, 0)
end
for id, ref in pairs(self.layers) do --Calling init for child layers
@@ -260,7 +267,7 @@ function TNN:flush_all() --flush all history and activation
local _, ref
for _, ref in pairs(self.layers) do
for i = 1, #ref.dim_in do
- for t = 1 - self.chunk_size, self.chunk_size * 2 do
+ for t = 1 - self.extend_t, self.chunk_size + self.extend_t do
ref.inputs_m[t][i]:fill(self.gconf.nn_act_default)
if (ref.inputs_b[t] == nil) then
ref.inputs_b[t] = {}
@@ -274,7 +281,7 @@ function TNN:flush_all() --flush all history and activation
end
end
for i = 1, #ref.dim_out do
- for t = 1 - self.chunk_size, self.chunk_size * 2 do
+ for t = 1 - self.extend_t, self.chunk_size + self.extend_t do
ref.outputs_m[t][i]:fill(self.gconf.nn_act_default)
if (ref.outputs_b[t] == nil) then
ref.outputs_b[t] = {}
@@ -302,13 +309,13 @@ end
function TNN:move_right_to_nextmb(list_t) --move output history activations of 1..chunk_size to 1-chunk_size..0
if list_t == nil then
list_t = {}
- for i = 1, self.chunk_size do
- list_t[i] = i - self.chunk_size
+ for i = self.extend_t, 1, -1 do
+ list_t[i] = 1 - i
end
end
for i = 1, #list_t do
t = list_t[i]
- if t < 1 - self.chunk_size or t > 0 then
+ if t < 1 - self.extend_t or t > 0 then
nerv.error("MB move range error")
end
for id, ref in pairs(self.layers) do