From 4e3cf34a29afbdb17c42ab7f1efacec52efd0e8b Mon Sep 17 00:00:00 2001 From: txh18 Date: Sun, 6 Dec 2015 19:18:07 +0800 Subject: added extend_t for tnn to save GPU memory --- nerv/examples/lmptb/bilstmlm_ptb_main.lua | 2 +- nerv/tnn/tnn.lua | 35 ++++++++++++++++++------------- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/nerv/examples/lmptb/bilstmlm_ptb_main.lua b/nerv/examples/lmptb/bilstmlm_ptb_main.lua index cf0009b..2aba1d2 100644 --- a/nerv/examples/lmptb/bilstmlm_ptb_main.lua +++ b/nerv/examples/lmptb/bilstmlm_ptb_main.lua @@ -364,7 +364,7 @@ else nerv.printf("%s no user setting, all default...\n", global_conf.sche_log_pre) end -global_conf.work_dir = global_conf.work_dir_base .. 'h' .. global_conf.hidden_size .. 'l' .. global_conf.layer_num .. 'ch' .. global_conf.chunk_size .. 'ba' .. global_conf.batch_size .. 'slr' .. global_conf.lrate .. 'wc' .. global_conf.wcost .. 'dr' .. global_conf.dropout_str +global_conf.work_dir = global_conf.work_dir_base .. 'h' .. global_conf.hidden_size .. 'l' .. global_conf.layer_num --.. 'ch' .. global_conf.chunk_size .. 'ba' .. global_conf.batch_size .. 'slr' .. global_conf.lrate .. 'wc' .. global_conf.wcost .. 'dr' .. global_conf.dropout_str global_conf.train_fn_shuf = global_conf.work_dir .. '/train_fn_shuf' global_conf.train_fn_shuf_bak = global_conf.train_fn_shuf .. '_bak' global_conf.param_fn = global_conf.work_dir .. "/params" diff --git a/nerv/tnn/tnn.lua b/nerv/tnn/tnn.lua index 56c9dc0..beb73ca 100644 --- a/nerv/tnn/tnn.lua +++ b/nerv/tnn/tnn.lua @@ -59,12 +59,12 @@ nerv.TNN.FC.HAS_INPUT = 1 nerv.TNN.FC.HAS_LABEL = 2 nerv.TNN.FC.SEQ_NORM = bit.bor(nerv.TNN.FC.HAS_INPUT, nerv.TNN.FC.HAS_LABEL) --This instance have both input and label -function TNN.make_initial_store(st, p, dim, batch_size, chunk_size, global_conf, st_c, p_c, t_c) - --Return a table of matrix storage from time (1-chunk_size)..(2*chunk_size) +function TNN.make_initial_store(st, p, dim, batch_size, chunk_size, extend_t, global_conf, st_c, p_c, t_c) + --Return a table of matrix storage from time (1-extend_t)..(chunk_size+extend_t) if (type(st) ~= "table") then nerv.error("st should be a table") end - for i = 1 - chunk_size - 1, chunk_size * 2 + 1 do --intentionally allocated more time, should be [1-chunk_size, chunk_size*2] + for i = 1 - extend_t - 1, chunk_size + extend_t + 1 do --intentionally allocated more time if (st[i] == nil) then st[i] = {} end @@ -97,6 +97,13 @@ function TNN:__init(id, global_conf, layer_conf) if self.clip_t > 0 then nerv.info("tnn(%s) will clip gradient across time with %f...", id, self.clip_t) end + + self.extend_t = layer_conf.extend_t --TNN will allocate storage of time for 1-extend_t .. chunk_size+extend_t + if self.extend_t == nil then + self.extend_t = 5 + end + nerv.info("tnn(%s) will extend storage beyond MB border for time steps %d...", id, self.extend_t) + local layers = {} local inputs_p = {} --map:port of the TNN to layer ref and port local outputs_p = {} @@ -173,9 +180,9 @@ function TNN:init(batch_size, chunk_size) print("TNN initing storage", ref_from.layer.id, "->", ref_to.layer.id) ref_to.inputs_matbak_p[port_to] = self.gconf.cumat_type(batch_size, dim) - self.make_initial_store(ref_from.outputs_m, port_from, dim, batch_size, chunk_size, self.gconf, ref_to.inputs_m, port_to, time) + self.make_initial_store(ref_from.outputs_m, port_from, dim, batch_size, chunk_size, self.extend_t, self.gconf, ref_to.inputs_m, port_to, time) ref_from.err_inputs_matbak_p[port_from] = self.gconf.cumat_type(batch_size, dim) - self.make_initial_store(ref_from.err_inputs_m, port_from, dim, batch_size, chunk_size, self.gconf, ref_to.err_outputs_m, port_to, time) + self.make_initial_store(ref_from.err_inputs_m, port_from, dim, batch_size, chunk_size, self.extend_t, self.gconf, ref_to.err_outputs_m, port_to, time) end @@ -184,8 +191,8 @@ function TNN:init(batch_size, chunk_size) for i = 1, #self.dim_out do --Init storage for output ports local ref = self.outputs_p[i].ref local p = self.outputs_p[i].port - self.make_initial_store(ref.outputs_m, p, self.dim_out[i], batch_size, chunk_size, self.gconf, self.outputs_m, i, 0) - self.make_initial_store(ref.err_inputs_m, p, self.dim_out[i], batch_size, chunk_size, self.gconf, self.err_inputs_m, i, 0) + self.make_initial_store(ref.outputs_m, p, self.dim_out[i], batch_size, chunk_size, self.extend_t, self.gconf, self.outputs_m, i, 0) + self.make_initial_store(ref.err_inputs_m, p, self.dim_out[i], batch_size, chunk_size, self.extend_t, self.gconf, self.err_inputs_m, i, 0) end self.inputs_m = {} @@ -193,8 +200,8 @@ function TNN:init(batch_size, chunk_size) for i = 1, #self.dim_in do --Init storage for input ports local ref = self.inputs_p[i].ref local p = self.inputs_p[i].port - self.make_initial_store(ref.inputs_m, p, self.dim_in[i], batch_size, chunk_size, self.gconf, self.inputs_m, i, 0) - self.make_initial_store(ref.err_outputs_m, p, self.dim_in[i], batch_size, chunk_size, self.gconf, self.err_outputs_m, i, 0) + self.make_initial_store(ref.inputs_m, p, self.dim_in[i], batch_size, chunk_size, self.extend_t, self.gconf, self.inputs_m, i, 0) + self.make_initial_store(ref.err_outputs_m, p, self.dim_in[i], batch_size, chunk_size, self.extend_t, self.gconf, self.err_outputs_m, i, 0) end for id, ref in pairs(self.layers) do --Calling init for child layers @@ -260,7 +267,7 @@ function TNN:flush_all() --flush all history and activation local _, ref for _, ref in pairs(self.layers) do for i = 1, #ref.dim_in do - for t = 1 - self.chunk_size, self.chunk_size * 2 do + for t = 1 - self.extend_t, self.chunk_size + self.extend_t do ref.inputs_m[t][i]:fill(self.gconf.nn_act_default) if (ref.inputs_b[t] == nil) then ref.inputs_b[t] = {} @@ -274,7 +281,7 @@ function TNN:flush_all() --flush all history and activation end end for i = 1, #ref.dim_out do - for t = 1 - self.chunk_size, self.chunk_size * 2 do + for t = 1 - self.extend_t, self.chunk_size + self.extend_t do ref.outputs_m[t][i]:fill(self.gconf.nn_act_default) if (ref.outputs_b[t] == nil) then ref.outputs_b[t] = {} @@ -302,13 +309,13 @@ end function TNN:move_right_to_nextmb(list_t) --move output history activations of 1..chunk_size to 1-chunk_size..0 if list_t == nil then list_t = {} - for i = 1, self.chunk_size do - list_t[i] = i - self.chunk_size + for i = self.extend_t, 1, -1 do + list_t[i] = 1 - i end end for i = 1, #list_t do t = list_t[i] - if t < 1 - self.chunk_size or t > 0 then + if t < 1 - self.extend_t or t > 0 then nerv.error("MB move range error") end for id, ref in pairs(self.layers) do -- cgit v1.2.3