added dropout_t layer

author: txh18 <cloudygooseg@gmail.com> 2015-12-02 18:00:47 +0800
committer: txh18 <cloudygooseg@gmail.com> 2015-12-02 18:00:47 +0800
commit: 094fc872d3e62c5f0950ac1747f130e30a08bee8 (patch)
tree: 2bb0c9df22c5899d9af4062f16c11261f23302dd
parent: 41a841f3e0992a578cf5c8f82ae44a552a6f8b2f (diff)
6 files changed, 148 insertions, 4 deletions
diff --git a/nerv/examples/lmptb/lstmlm_ptb_main.lua b/nerv/examples/lmptb/lstmlm_ptb_main.lua
index 24db06c..4c46369 100644
--- a/nerv/examples/lmptb/lstmlm_ptb_main.lua
+++ b/nerv/examples/lmptb/lstmlm_ptb_main.lua
@@ -84,6 +84,10 @@ function prepare_layers(global_conf)
         ["nerv.LSTMLayerT"] = {
             ["lstmL1"] = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size, global_conf.hidden_size}, ["pr"] = pr}}, 
         },
+        
+        ["nerv.DropoutLayerT"] = {
+            ["dropoutL1"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}}, 
+        },
 
         ["nerv.SelectLinearLayer"] = {
             ["selectL1"] = {{}, {["dim_in"] = {1}, ["dim_out"] = {global_conf.hidden_size}, ["vocab"] = global_conf.vocab, ["pr"] = pr}},
@@ -137,7 +141,8 @@ function prepare_tnn(global_conf, layerRepo)
         
         {"selectL1[1]", "lstmL1[1]", 0},
         {"lstmL1[2]", "lstmL1[3]", 1},
-        {"lstmL1[1]", "combinerL1[1]", 0},
+        {"lstmL1[1]", "dropoutL1[1]", 0},
+        {"dropoutL1[1]", "combinerL1[1]", 0},
         {"combinerL1[1]", "lstmL1[2]", 1},
 
         {"combinerL1[2]", "outputL[1]", 0},
@@ -279,7 +284,7 @@ else
     printf("%s no user setting, all default...\n", global_conf.sche_log_pre)
 end
 
-global_conf.work_dir = global_conf.work_dir_base .. 'h' .. global_conf.hidden_size .. 'ch' .. global_conf.chunk_size .. 'ba' .. global_conf.batch_size .. 'slr' ..   global_conf.lrate .. 'wc' .. global_conf.wcost
+global_conf.work_dir = global_conf.work_dir_base .. 'h' .. global_conf.hidden_size --.. 'ch' .. global_conf.chunk_size .. 'ba' .. global_conf.batch_size .. 'slr' ..   global_conf.lrate .. 'wc' .. global_conf.wcost
 global_conf.train_fn_shuf = global_conf.work_dir .. '/train_fn_shuf'
 global_conf.train_fn_shuf_bak = global_conf.train_fn_shuf .. '_bak'
 global_conf.param_fn = global_conf.work_dir .. "/params"
diff --git a/nerv/examples/lmptb/m-tests/sutil_test.lua b/nerv/examples/lmptb/m-tests/sutil_test.lua
new file mode 100644
index 0000000..c2425c2
--- /dev/null
+++ b/nerv/examples/lmptb/m-tests/sutil_test.lua
@@ -0,0 +1,4 @@
+require "tnn.init"
+
+ss = "0.1*1:2"
+nerv.SUtil.parse_schedule(ss)
diff --git a/nerv/examples/lmptb/tnn/init.lua b/nerv/examples/lmptb/tnn/init.lua
index ddaa6b8..66ea4ed 100644
--- a/nerv/examples/lmptb/tnn/init.lua
+++ b/nerv/examples/lmptb/tnn/init.lua
@@ -45,6 +45,7 @@ nerv.include('sutil.lua')
 nerv.include('tnn.lua')
 nerv.include('layersT/softmax_ce_t.lua')
 nerv.include('layersT/lstm_t.lua')
+nerv.include('layersT/dropout_t.lua')
 nerv.include('layers/elem_mul.lua')
 nerv.include('layers/gate_fff.lua')
 nerv.include('layer_dag_t.lua')
diff --git a/nerv/examples/lmptb/tnn/layersT/dropout_t.lua b/nerv/examples/lmptb/tnn/layersT/dropout_t.lua
new file mode 100644
index 0000000..4351285
--- /dev/null
+++ b/nerv/examples/lmptb/tnn/layersT/dropout_t.lua
@@ -0,0 +1,71 @@
+local Dropout = nerv.class("nerv.DropoutLayerT", "nerv.LayerT")
+
+function Dropout:__init(id, global_conf, layer_conf)
+    self.id = id
+    self.gconf = global_conf
+    self.dim_in = layer_conf.dim_in
+    self.dim_out = layer_conf.dim_out
+    self:check_dim_len(1, 1) -- two inputs: nn output and label
+end
+
+function Dropout:init(batch_size, chunk_size)
+    if self.dim_in[1] ~= self.dim_out[1] then
+        nerv.error("mismatching dimensions of input and output")
+    end
+    if chunk_size == nil then
+        chunk_size = 1
+    end
+    self.mask_t = {}
+    for t = 1, chunk_size do
+        self.mask_t[t] = self.gconf.cumat_type(batch_size, self.dim_in[1])
+    end
+end
+
+function Dropout:batch_resize(batch_size, chunk_size)
+    if chunk_size == nil then
+        chunk_size = 1
+    end
+    for t = 1, chunk_size do
+        if self.mask_t[t] == nil or self.mask_t[t]:nrow() ~= batch_size then
+            self.mask_t[t] = self.gconf.cumat_type(batch_size, self.dim_in[1])
+        end
+    end
+end
+
+function Dropout:propagate(input, output, t)
+    if t == nil then
+        t = 1
+    end
+    if self.gconf.dropout_rate == nil then
+        nerv.info("DropoutLayerT:propagate warning, global_conf.dropout_rate is nil, setting it zero")
+        self.gconf.dropout_rate = 0
+    end
+    
+    if self.gconf.dropout_rate == 0 then
+        output[1]:copy_fromd(input[1])
+    else
+        self.mask_t[t]:rand_uniform()
+        --since we will lose a portion of the actvations, we multiply the activations by 1/(1-dr) to compensate
+        self.mask_t[t]:thres_mask(self.mask_t[t], self.gconf.dropout_rate, 0, 1 / (1.0 - self.gconf.dropout_rate))
+        output[1]:mul_elem(input[1], self.mask_t[t])
+    end
+end
+
+function Dropout:update(bp_err, input, output, t)
+    -- no params, therefore do nothing
+end
+
+function Dropout:back_propagate(bp_err, next_bp_err, input, output, t)
+    if t == nil then
+        t = 1
+    end
+    if self.gconf.dropout_rate == 0 then
+        next_bp_err[1]:copy_fromd(bp_err[1])
+    else
+        next_bp_err[1]:mul_elem(bp_err[1], self.mask_t[t])
+    end
+end
+
+function Dropout:get_params()
+    return nerv.ParamRepo({})
+end
diff --git a/nerv/examples/lmptb/tnn/layersT/softmax_ce_t.lua b/nerv/examples/lmptb/tnn/layersT/softmax_ce_t.lua
index dddb05a..a9ce975 100644
--- a/nerv/examples/lmptb/tnn/layersT/softmax_ce_t.lua
+++ b/nerv/examples/lmptb/tnn/layersT/softmax_ce_t.lua
@@ -16,6 +16,9 @@ function SoftmaxCELayer:init(batch_size, chunk_size)
     if not self.compressed and (self.dim_in[1] ~= self.dim_in[2]) then
         nerv.error("mismatching dimensions of previous network output and labels")
     end
+    if chunk_size == nil then
+        chunk_size = 1
+    end
     self.total_ce = 0.0
     self.total_correct = 0
     self.total_frames = 0
@@ -27,9 +30,12 @@ function SoftmaxCELayer:init(batch_size, chunk_size)
     end
 end
 
-function SoftmaxCELayer:batch_resize(batch_size)
+function SoftmaxCELayer:batch_resize(batch_size, chunk_size)
+    if chunk_size == nil then
+        chunk_size = 1
+    end
     for t = 1, chunk_size do
-        if self.softmax_t[t]:nrow() ~= batch_resize then
+        if self.softmax_t[t]:nrow() ~= batch_size then
             self.softmax_t[t] = self.gconf.cumat_type(batch_size, self.dim_in[1])
             self.ce_t[t] = self.gconf.cumat_type(batch_size, self.dim_in[1])
         end
@@ -41,6 +47,9 @@ function SoftmaxCELayer:update(bp_err, input, output, t)
 end
 
 function SoftmaxCELayer:propagate(input, output, t)
+    if t == nil then
+        t = 1
+    end
     local softmax = self.softmax_t[t]
     local ce = self.ce_t[t]
     local classified = softmax:softmax(input[1])
@@ -65,6 +74,9 @@ end
 
 function SoftmaxCELayer:back_propagate(bp_err, next_bp_err, input, output, t)
     -- softmax output - label
+    if t == nil then
+        t = 1
+    end
     local label = input[2]
     if self.compressed then
         label = label:decompress(input[1]:ncol())
diff --git a/nerv/examples/lmptb/tnn/sutil.lua b/nerv/examples/lmptb/tnn/sutil.lua
new file mode 100644
index 0000000..d157a26
--- /dev/null
+++ b/nerv/examples/lmptb/tnn/sutil.lua
@@ -0,0 +1,51 @@
+local Util = nerv.class("nerv.SUtil") --Scheduler Utility
+
+function Util.simple_split(inputstr, sep)
+    if sep == nil then
+        sep = "%s"
+    end
+    local t={} ; i=1
+    for str in string.gmatch(inputstr, "([^"..sep.."]+)") do
+        t[i] = str
+        i = i + 1
+    end
+    return t
+end
+
+function Util.parse_schedule(str)
+    --parse a string like "1.2*10:1.5" to a list of numbers
+    local sch = {}
+    local s = Util.simple_split(str, ':')
+    for i = 1, #s do
+        local p = Util.simple_split(s[i], "%*")
+        if #p ~= 2 and #p ~= 1 then
+            nerv.error("nerv.SUtil:parse_schedule error, unit(%s) not proper, has %d components.", s[i], #p)
+        end
+        if p[2] == nil then
+            p[2] = "1"
+        end 
+        p[1] = tonumber(p[1])
+        p[2] = tonumber(p[2])
+        for j = 1, p[2] do
+            table.insert(sch, p[1])
+        end 
+    end 
+
+    for i = 1, #sch do
+        print(sch[i])
+    end
+end
+
+function Util.sche_get(s, it)
+    --get s[it]
+    if s == nil then
+        nerv.info("Util.sche_get: warning, scheule is nil, returning zero...")
+        return 0
+    end
+    if #s >= it then
+        return s[it]
+    else
+        nerv.info("Util.sche_get: warning, it(%d) > #schedule(%d), returning the last one of schedule(%f)...", it, #s, s[#s])
+        return s[#s]
+    end
+end
author	txh18 <cloudygooseg@gmail.com>	2015-12-02 18:00:47 +0800
committer	txh18 <cloudygooseg@gmail.com>	2015-12-02 18:00:47 +0800
commit	094fc872d3e62c5f0950ac1747f130e30a08bee8 (patch)
tree	2bb0c9df22c5899d9af4062f16c11261f23302dd
parent	41a841f3e0992a578cf5c8f82ae44a552a6f8b2f (diff)