diff options
-rw-r--r-- | nerv/examples/lmptb/lstmlm_ptb_main.lua | 9 | ||||
-rw-r--r-- | nerv/examples/lmptb/m-tests/sutil_test.lua | 4 | ||||
-rw-r--r-- | nerv/examples/lmptb/tnn/init.lua | 1 | ||||
-rw-r--r-- | nerv/examples/lmptb/tnn/layersT/dropout_t.lua | 71 | ||||
-rw-r--r-- | nerv/examples/lmptb/tnn/layersT/softmax_ce_t.lua | 16 | ||||
-rw-r--r-- | nerv/examples/lmptb/tnn/sutil.lua | 51 |
6 files changed, 148 insertions, 4 deletions
diff --git a/nerv/examples/lmptb/lstmlm_ptb_main.lua b/nerv/examples/lmptb/lstmlm_ptb_main.lua index 24db06c..4c46369 100644 --- a/nerv/examples/lmptb/lstmlm_ptb_main.lua +++ b/nerv/examples/lmptb/lstmlm_ptb_main.lua @@ -84,6 +84,10 @@ function prepare_layers(global_conf) ["nerv.LSTMLayerT"] = { ["lstmL1"] = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size, global_conf.hidden_size}, ["pr"] = pr}}, }, + + ["nerv.DropoutLayerT"] = { + ["dropoutL1"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}}, + }, ["nerv.SelectLinearLayer"] = { ["selectL1"] = {{}, {["dim_in"] = {1}, ["dim_out"] = {global_conf.hidden_size}, ["vocab"] = global_conf.vocab, ["pr"] = pr}}, @@ -137,7 +141,8 @@ function prepare_tnn(global_conf, layerRepo) {"selectL1[1]", "lstmL1[1]", 0}, {"lstmL1[2]", "lstmL1[3]", 1}, - {"lstmL1[1]", "combinerL1[1]", 0}, + {"lstmL1[1]", "dropoutL1[1]", 0}, + {"dropoutL1[1]", "combinerL1[1]", 0}, {"combinerL1[1]", "lstmL1[2]", 1}, {"combinerL1[2]", "outputL[1]", 0}, @@ -279,7 +284,7 @@ else printf("%s no user setting, all default...\n", global_conf.sche_log_pre) end -global_conf.work_dir = global_conf.work_dir_base .. 'h' .. global_conf.hidden_size .. 'ch' .. global_conf.chunk_size .. 'ba' .. global_conf.batch_size .. 'slr' .. global_conf.lrate .. 'wc' .. global_conf.wcost +global_conf.work_dir = global_conf.work_dir_base .. 'h' .. global_conf.hidden_size --.. 'ch' .. global_conf.chunk_size .. 'ba' .. global_conf.batch_size .. 'slr' .. global_conf.lrate .. 'wc' .. global_conf.wcost global_conf.train_fn_shuf = global_conf.work_dir .. '/train_fn_shuf' global_conf.train_fn_shuf_bak = global_conf.train_fn_shuf .. '_bak' global_conf.param_fn = global_conf.work_dir .. "/params" diff --git a/nerv/examples/lmptb/m-tests/sutil_test.lua b/nerv/examples/lmptb/m-tests/sutil_test.lua new file mode 100644 index 0000000..c2425c2 --- /dev/null +++ b/nerv/examples/lmptb/m-tests/sutil_test.lua @@ -0,0 +1,4 @@ +require "tnn.init" + +ss = "0.1*1:2" +nerv.SUtil.parse_schedule(ss) diff --git a/nerv/examples/lmptb/tnn/init.lua b/nerv/examples/lmptb/tnn/init.lua index ddaa6b8..66ea4ed 100644 --- a/nerv/examples/lmptb/tnn/init.lua +++ b/nerv/examples/lmptb/tnn/init.lua @@ -45,6 +45,7 @@ nerv.include('sutil.lua') nerv.include('tnn.lua') nerv.include('layersT/softmax_ce_t.lua') nerv.include('layersT/lstm_t.lua') +nerv.include('layersT/dropout_t.lua') nerv.include('layers/elem_mul.lua') nerv.include('layers/gate_fff.lua') nerv.include('layer_dag_t.lua') diff --git a/nerv/examples/lmptb/tnn/layersT/dropout_t.lua b/nerv/examples/lmptb/tnn/layersT/dropout_t.lua new file mode 100644 index 0000000..4351285 --- /dev/null +++ b/nerv/examples/lmptb/tnn/layersT/dropout_t.lua @@ -0,0 +1,71 @@ +local Dropout = nerv.class("nerv.DropoutLayerT", "nerv.LayerT") + +function Dropout:__init(id, global_conf, layer_conf) + self.id = id + self.gconf = global_conf + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self:check_dim_len(1, 1) -- two inputs: nn output and label +end + +function Dropout:init(batch_size, chunk_size) + if self.dim_in[1] ~= self.dim_out[1] then + nerv.error("mismatching dimensions of input and output") + end + if chunk_size == nil then + chunk_size = 1 + end + self.mask_t = {} + for t = 1, chunk_size do + self.mask_t[t] = self.gconf.cumat_type(batch_size, self.dim_in[1]) + end +end + +function Dropout:batch_resize(batch_size, chunk_size) + if chunk_size == nil then + chunk_size = 1 + end + for t = 1, chunk_size do + if self.mask_t[t] == nil or self.mask_t[t]:nrow() ~= batch_size then + self.mask_t[t] = self.gconf.cumat_type(batch_size, self.dim_in[1]) + end + end +end + +function Dropout:propagate(input, output, t) + if t == nil then + t = 1 + end + if self.gconf.dropout_rate == nil then + nerv.info("DropoutLayerT:propagate warning, global_conf.dropout_rate is nil, setting it zero") + self.gconf.dropout_rate = 0 + end + + if self.gconf.dropout_rate == 0 then + output[1]:copy_fromd(input[1]) + else + self.mask_t[t]:rand_uniform() + --since we will lose a portion of the actvations, we multiply the activations by 1/(1-dr) to compensate + self.mask_t[t]:thres_mask(self.mask_t[t], self.gconf.dropout_rate, 0, 1 / (1.0 - self.gconf.dropout_rate)) + output[1]:mul_elem(input[1], self.mask_t[t]) + end +end + +function Dropout:update(bp_err, input, output, t) + -- no params, therefore do nothing +end + +function Dropout:back_propagate(bp_err, next_bp_err, input, output, t) + if t == nil then + t = 1 + end + if self.gconf.dropout_rate == 0 then + next_bp_err[1]:copy_fromd(bp_err[1]) + else + next_bp_err[1]:mul_elem(bp_err[1], self.mask_t[t]) + end +end + +function Dropout:get_params() + return nerv.ParamRepo({}) +end diff --git a/nerv/examples/lmptb/tnn/layersT/softmax_ce_t.lua b/nerv/examples/lmptb/tnn/layersT/softmax_ce_t.lua index dddb05a..a9ce975 100644 --- a/nerv/examples/lmptb/tnn/layersT/softmax_ce_t.lua +++ b/nerv/examples/lmptb/tnn/layersT/softmax_ce_t.lua @@ -16,6 +16,9 @@ function SoftmaxCELayer:init(batch_size, chunk_size) if not self.compressed and (self.dim_in[1] ~= self.dim_in[2]) then nerv.error("mismatching dimensions of previous network output and labels") end + if chunk_size == nil then + chunk_size = 1 + end self.total_ce = 0.0 self.total_correct = 0 self.total_frames = 0 @@ -27,9 +30,12 @@ function SoftmaxCELayer:init(batch_size, chunk_size) end end -function SoftmaxCELayer:batch_resize(batch_size) +function SoftmaxCELayer:batch_resize(batch_size, chunk_size) + if chunk_size == nil then + chunk_size = 1 + end for t = 1, chunk_size do - if self.softmax_t[t]:nrow() ~= batch_resize then + if self.softmax_t[t]:nrow() ~= batch_size then self.softmax_t[t] = self.gconf.cumat_type(batch_size, self.dim_in[1]) self.ce_t[t] = self.gconf.cumat_type(batch_size, self.dim_in[1]) end @@ -41,6 +47,9 @@ function SoftmaxCELayer:update(bp_err, input, output, t) end function SoftmaxCELayer:propagate(input, output, t) + if t == nil then + t = 1 + end local softmax = self.softmax_t[t] local ce = self.ce_t[t] local classified = softmax:softmax(input[1]) @@ -65,6 +74,9 @@ end function SoftmaxCELayer:back_propagate(bp_err, next_bp_err, input, output, t) -- softmax output - label + if t == nil then + t = 1 + end local label = input[2] if self.compressed then label = label:decompress(input[1]:ncol()) diff --git a/nerv/examples/lmptb/tnn/sutil.lua b/nerv/examples/lmptb/tnn/sutil.lua new file mode 100644 index 0000000..d157a26 --- /dev/null +++ b/nerv/examples/lmptb/tnn/sutil.lua @@ -0,0 +1,51 @@ +local Util = nerv.class("nerv.SUtil") --Scheduler Utility + +function Util.simple_split(inputstr, sep) + if sep == nil then + sep = "%s" + end + local t={} ; i=1 + for str in string.gmatch(inputstr, "([^"..sep.."]+)") do + t[i] = str + i = i + 1 + end + return t +end + +function Util.parse_schedule(str) + --parse a string like "1.2*10:1.5" to a list of numbers + local sch = {} + local s = Util.simple_split(str, ':') + for i = 1, #s do + local p = Util.simple_split(s[i], "%*") + if #p ~= 2 and #p ~= 1 then + nerv.error("nerv.SUtil:parse_schedule error, unit(%s) not proper, has %d components.", s[i], #p) + end + if p[2] == nil then + p[2] = "1" + end + p[1] = tonumber(p[1]) + p[2] = tonumber(p[2]) + for j = 1, p[2] do + table.insert(sch, p[1]) + end + end + + for i = 1, #sch do + print(sch[i]) + end +end + +function Util.sche_get(s, it) + --get s[it] + if s == nil then + nerv.info("Util.sche_get: warning, scheule is nil, returning zero...") + return 0 + end + if #s >= it then + return s[it] + else + nerv.info("Util.sche_get: warning, it(%d) > #schedule(%d), returning the last one of schedule(%f)...", it, #s, s[#s]) + return s[#s] + end +end |