aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--nerv/examples/lmptb/lstmlm_ptb_main.lua9
-rw-r--r--nerv/examples/lmptb/m-tests/sutil_test.lua4
-rw-r--r--nerv/examples/lmptb/tnn/init.lua1
-rw-r--r--nerv/examples/lmptb/tnn/layersT/dropout_t.lua71
-rw-r--r--nerv/examples/lmptb/tnn/layersT/softmax_ce_t.lua16
-rw-r--r--nerv/examples/lmptb/tnn/sutil.lua51
6 files changed, 148 insertions, 4 deletions
diff --git a/nerv/examples/lmptb/lstmlm_ptb_main.lua b/nerv/examples/lmptb/lstmlm_ptb_main.lua
index 24db06c..4c46369 100644
--- a/nerv/examples/lmptb/lstmlm_ptb_main.lua
+++ b/nerv/examples/lmptb/lstmlm_ptb_main.lua
@@ -84,6 +84,10 @@ function prepare_layers(global_conf)
["nerv.LSTMLayerT"] = {
["lstmL1"] = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size, global_conf.hidden_size}, ["pr"] = pr}},
},
+
+ ["nerv.DropoutLayerT"] = {
+ ["dropoutL1"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}},
+ },
["nerv.SelectLinearLayer"] = {
["selectL1"] = {{}, {["dim_in"] = {1}, ["dim_out"] = {global_conf.hidden_size}, ["vocab"] = global_conf.vocab, ["pr"] = pr}},
@@ -137,7 +141,8 @@ function prepare_tnn(global_conf, layerRepo)
{"selectL1[1]", "lstmL1[1]", 0},
{"lstmL1[2]", "lstmL1[3]", 1},
- {"lstmL1[1]", "combinerL1[1]", 0},
+ {"lstmL1[1]", "dropoutL1[1]", 0},
+ {"dropoutL1[1]", "combinerL1[1]", 0},
{"combinerL1[1]", "lstmL1[2]", 1},
{"combinerL1[2]", "outputL[1]", 0},
@@ -279,7 +284,7 @@ else
printf("%s no user setting, all default...\n", global_conf.sche_log_pre)
end
-global_conf.work_dir = global_conf.work_dir_base .. 'h' .. global_conf.hidden_size .. 'ch' .. global_conf.chunk_size .. 'ba' .. global_conf.batch_size .. 'slr' .. global_conf.lrate .. 'wc' .. global_conf.wcost
+global_conf.work_dir = global_conf.work_dir_base .. 'h' .. global_conf.hidden_size --.. 'ch' .. global_conf.chunk_size .. 'ba' .. global_conf.batch_size .. 'slr' .. global_conf.lrate .. 'wc' .. global_conf.wcost
global_conf.train_fn_shuf = global_conf.work_dir .. '/train_fn_shuf'
global_conf.train_fn_shuf_bak = global_conf.train_fn_shuf .. '_bak'
global_conf.param_fn = global_conf.work_dir .. "/params"
diff --git a/nerv/examples/lmptb/m-tests/sutil_test.lua b/nerv/examples/lmptb/m-tests/sutil_test.lua
new file mode 100644
index 0000000..c2425c2
--- /dev/null
+++ b/nerv/examples/lmptb/m-tests/sutil_test.lua
@@ -0,0 +1,4 @@
+require "tnn.init"
+
+ss = "0.1*1:2"
+nerv.SUtil.parse_schedule(ss)
diff --git a/nerv/examples/lmptb/tnn/init.lua b/nerv/examples/lmptb/tnn/init.lua
index ddaa6b8..66ea4ed 100644
--- a/nerv/examples/lmptb/tnn/init.lua
+++ b/nerv/examples/lmptb/tnn/init.lua
@@ -45,6 +45,7 @@ nerv.include('sutil.lua')
nerv.include('tnn.lua')
nerv.include('layersT/softmax_ce_t.lua')
nerv.include('layersT/lstm_t.lua')
+nerv.include('layersT/dropout_t.lua')
nerv.include('layers/elem_mul.lua')
nerv.include('layers/gate_fff.lua')
nerv.include('layer_dag_t.lua')
diff --git a/nerv/examples/lmptb/tnn/layersT/dropout_t.lua b/nerv/examples/lmptb/tnn/layersT/dropout_t.lua
new file mode 100644
index 0000000..4351285
--- /dev/null
+++ b/nerv/examples/lmptb/tnn/layersT/dropout_t.lua
@@ -0,0 +1,71 @@
+local Dropout = nerv.class("nerv.DropoutLayerT", "nerv.LayerT")
+
+function Dropout:__init(id, global_conf, layer_conf)
+ self.id = id
+ self.gconf = global_conf
+ self.dim_in = layer_conf.dim_in
+ self.dim_out = layer_conf.dim_out
+ self:check_dim_len(1, 1) -- two inputs: nn output and label
+end
+
+function Dropout:init(batch_size, chunk_size)
+ if self.dim_in[1] ~= self.dim_out[1] then
+ nerv.error("mismatching dimensions of input and output")
+ end
+ if chunk_size == nil then
+ chunk_size = 1
+ end
+ self.mask_t = {}
+ for t = 1, chunk_size do
+ self.mask_t[t] = self.gconf.cumat_type(batch_size, self.dim_in[1])
+ end
+end
+
+function Dropout:batch_resize(batch_size, chunk_size)
+ if chunk_size == nil then
+ chunk_size = 1
+ end
+ for t = 1, chunk_size do
+ if self.mask_t[t] == nil or self.mask_t[t]:nrow() ~= batch_size then
+ self.mask_t[t] = self.gconf.cumat_type(batch_size, self.dim_in[1])
+ end
+ end
+end
+
+function Dropout:propagate(input, output, t)
+ if t == nil then
+ t = 1
+ end
+ if self.gconf.dropout_rate == nil then
+ nerv.info("DropoutLayerT:propagate warning, global_conf.dropout_rate is nil, setting it zero")
+ self.gconf.dropout_rate = 0
+ end
+
+ if self.gconf.dropout_rate == 0 then
+ output[1]:copy_fromd(input[1])
+ else
+ self.mask_t[t]:rand_uniform()
+ --since we will lose a portion of the actvations, we multiply the activations by 1/(1-dr) to compensate
+ self.mask_t[t]:thres_mask(self.mask_t[t], self.gconf.dropout_rate, 0, 1 / (1.0 - self.gconf.dropout_rate))
+ output[1]:mul_elem(input[1], self.mask_t[t])
+ end
+end
+
+function Dropout:update(bp_err, input, output, t)
+ -- no params, therefore do nothing
+end
+
+function Dropout:back_propagate(bp_err, next_bp_err, input, output, t)
+ if t == nil then
+ t = 1
+ end
+ if self.gconf.dropout_rate == 0 then
+ next_bp_err[1]:copy_fromd(bp_err[1])
+ else
+ next_bp_err[1]:mul_elem(bp_err[1], self.mask_t[t])
+ end
+end
+
+function Dropout:get_params()
+ return nerv.ParamRepo({})
+end
diff --git a/nerv/examples/lmptb/tnn/layersT/softmax_ce_t.lua b/nerv/examples/lmptb/tnn/layersT/softmax_ce_t.lua
index dddb05a..a9ce975 100644
--- a/nerv/examples/lmptb/tnn/layersT/softmax_ce_t.lua
+++ b/nerv/examples/lmptb/tnn/layersT/softmax_ce_t.lua
@@ -16,6 +16,9 @@ function SoftmaxCELayer:init(batch_size, chunk_size)
if not self.compressed and (self.dim_in[1] ~= self.dim_in[2]) then
nerv.error("mismatching dimensions of previous network output and labels")
end
+ if chunk_size == nil then
+ chunk_size = 1
+ end
self.total_ce = 0.0
self.total_correct = 0
self.total_frames = 0
@@ -27,9 +30,12 @@ function SoftmaxCELayer:init(batch_size, chunk_size)
end
end
-function SoftmaxCELayer:batch_resize(batch_size)
+function SoftmaxCELayer:batch_resize(batch_size, chunk_size)
+ if chunk_size == nil then
+ chunk_size = 1
+ end
for t = 1, chunk_size do
- if self.softmax_t[t]:nrow() ~= batch_resize then
+ if self.softmax_t[t]:nrow() ~= batch_size then
self.softmax_t[t] = self.gconf.cumat_type(batch_size, self.dim_in[1])
self.ce_t[t] = self.gconf.cumat_type(batch_size, self.dim_in[1])
end
@@ -41,6 +47,9 @@ function SoftmaxCELayer:update(bp_err, input, output, t)
end
function SoftmaxCELayer:propagate(input, output, t)
+ if t == nil then
+ t = 1
+ end
local softmax = self.softmax_t[t]
local ce = self.ce_t[t]
local classified = softmax:softmax(input[1])
@@ -65,6 +74,9 @@ end
function SoftmaxCELayer:back_propagate(bp_err, next_bp_err, input, output, t)
-- softmax output - label
+ if t == nil then
+ t = 1
+ end
local label = input[2]
if self.compressed then
label = label:decompress(input[1]:ncol())
diff --git a/nerv/examples/lmptb/tnn/sutil.lua b/nerv/examples/lmptb/tnn/sutil.lua
new file mode 100644
index 0000000..d157a26
--- /dev/null
+++ b/nerv/examples/lmptb/tnn/sutil.lua
@@ -0,0 +1,51 @@
+local Util = nerv.class("nerv.SUtil") --Scheduler Utility
+
+function Util.simple_split(inputstr, sep)
+ if sep == nil then
+ sep = "%s"
+ end
+ local t={} ; i=1
+ for str in string.gmatch(inputstr, "([^"..sep.."]+)") do
+ t[i] = str
+ i = i + 1
+ end
+ return t
+end
+
+function Util.parse_schedule(str)
+ --parse a string like "1.2*10:1.5" to a list of numbers
+ local sch = {}
+ local s = Util.simple_split(str, ':')
+ for i = 1, #s do
+ local p = Util.simple_split(s[i], "%*")
+ if #p ~= 2 and #p ~= 1 then
+ nerv.error("nerv.SUtil:parse_schedule error, unit(%s) not proper, has %d components.", s[i], #p)
+ end
+ if p[2] == nil then
+ p[2] = "1"
+ end
+ p[1] = tonumber(p[1])
+ p[2] = tonumber(p[2])
+ for j = 1, p[2] do
+ table.insert(sch, p[1])
+ end
+ end
+
+ for i = 1, #sch do
+ print(sch[i])
+ end
+end
+
+function Util.sche_get(s, it)
+ --get s[it]
+ if s == nil then
+ nerv.info("Util.sche_get: warning, scheule is nil, returning zero...")
+ return 0
+ end
+ if #s >= it then
+ return s[it]
+ else
+ nerv.info("Util.sche_get: warning, it(%d) > #schedule(%d), returning the last one of schedule(%f)...", it, #s, s[#s])
+ return s[#s]
+ end
+end