diff options
Diffstat (limited to 'nerv/tnn/layersT')
-rw-r--r-- | nerv/tnn/layersT/dropout_t.lua | 71 | ||||
-rw-r--r-- | nerv/tnn/layersT/gru_t.lua | 114 | ||||
-rw-r--r-- | nerv/tnn/layersT/lstm_t.lua | 124 | ||||
-rw-r--r-- | nerv/tnn/layersT/softmax_ce_t.lua | 93 |
4 files changed, 0 insertions, 402 deletions
diff --git a/nerv/tnn/layersT/dropout_t.lua b/nerv/tnn/layersT/dropout_t.lua deleted file mode 100644 index 4351285..0000000 --- a/nerv/tnn/layersT/dropout_t.lua +++ /dev/null @@ -1,71 +0,0 @@ -local Dropout = nerv.class("nerv.DropoutLayerT", "nerv.LayerT") - -function Dropout:__init(id, global_conf, layer_conf) - self.id = id - self.gconf = global_conf - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self:check_dim_len(1, 1) -- two inputs: nn output and label -end - -function Dropout:init(batch_size, chunk_size) - if self.dim_in[1] ~= self.dim_out[1] then - nerv.error("mismatching dimensions of input and output") - end - if chunk_size == nil then - chunk_size = 1 - end - self.mask_t = {} - for t = 1, chunk_size do - self.mask_t[t] = self.gconf.cumat_type(batch_size, self.dim_in[1]) - end -end - -function Dropout:batch_resize(batch_size, chunk_size) - if chunk_size == nil then - chunk_size = 1 - end - for t = 1, chunk_size do - if self.mask_t[t] == nil or self.mask_t[t]:nrow() ~= batch_size then - self.mask_t[t] = self.gconf.cumat_type(batch_size, self.dim_in[1]) - end - end -end - -function Dropout:propagate(input, output, t) - if t == nil then - t = 1 - end - if self.gconf.dropout_rate == nil then - nerv.info("DropoutLayerT:propagate warning, global_conf.dropout_rate is nil, setting it zero") - self.gconf.dropout_rate = 0 - end - - if self.gconf.dropout_rate == 0 then - output[1]:copy_fromd(input[1]) - else - self.mask_t[t]:rand_uniform() - --since we will lose a portion of the actvations, we multiply the activations by 1/(1-dr) to compensate - self.mask_t[t]:thres_mask(self.mask_t[t], self.gconf.dropout_rate, 0, 1 / (1.0 - self.gconf.dropout_rate)) - output[1]:mul_elem(input[1], self.mask_t[t]) - end -end - -function Dropout:update(bp_err, input, output, t) - -- no params, therefore do nothing -end - -function Dropout:back_propagate(bp_err, next_bp_err, input, output, t) - if t == nil then - t = 1 - end - if self.gconf.dropout_rate == 0 then - next_bp_err[1]:copy_fromd(bp_err[1]) - else - next_bp_err[1]:mul_elem(bp_err[1], self.mask_t[t]) - end -end - -function Dropout:get_params() - return nerv.ParamRepo({}) -end diff --git a/nerv/tnn/layersT/gru_t.lua b/nerv/tnn/layersT/gru_t.lua deleted file mode 100644 index 8f15cc8..0000000 --- a/nerv/tnn/layersT/gru_t.lua +++ /dev/null @@ -1,114 +0,0 @@ -local GRULayerT = nerv.class('nerv.GRULayerT', 'nerv.LayerT') - -function GRULayerT:__init(id, global_conf, layer_conf) - --input1:x input2:h input3:c(h^~) - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf - - if self.dim_in[2] ~= self.dim_out[1] then - nerv.error("dim_in[2](%d) mismatch with dim_out[1](%d)", self.dim_in[2], self.dim_out[1]) - end - - --prepare a DAGLayerT to hold the lstm structure - local pr = layer_conf.pr - if pr == nil then - pr = nerv.ParamRepo() - end - - local function ap(str) - return self.id .. '.' .. str - end - - local layers = { - ["nerv.CombinerLayer"] = { - [ap("inputXDup")] = {{}, {["dim_in"] = {self.dim_in[1]}, - ["dim_out"] = {self.dim_in[1], self.dim_in[1], self.dim_in[1]}, ["lambda"] = {1}}}, - [ap("inputHDup")] = {{}, {["dim_in"] = {self.dim_in[2]}, - ["dim_out"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2], self.dim_in[2], self.dim_in[2]}, ["lambda"] = {1}}}, - [ap("updateGDup")] = {{}, {["dim_in"] = {self.dim_in[2]}, - ["dim_out"] = {self.dim_in[2], self.dim_in[2]}, ["lambda"] = {1}}}, - [ap("updateMergeL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_out[1]}, - ["lambda"] = {1, -1, 1}}}, - }, - ["nerv.AffineLayer"] = { - [ap("mainAffineL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]}, ["dim_out"] = {self.dim_out[1]}, ["pr"] = pr}}, - }, - ["nerv.TanhLayer"] = { - [ap("mainTanhL")] = {{}, {["dim_in"] = {self.dim_out[1]}, ["dim_out"] = {self.dim_out[1]}}}, - }, - ["nerv.GateFLayer"] = { - [ap("resetGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]}, - ["dim_out"] = {self.dim_in[2]}, ["pr"] = pr}}, - [ap("updateGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]}, - ["dim_out"] = {self.dim_in[2]}, ["pr"] = pr}}, - }, - ["nerv.ElemMulLayer"] = { - [ap("resetGMulL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_in[2]}}}, - [ap("updateGMulCL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_in[2]}}}, - [ap("updateGMulHL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_in[2]}}}, - }, - } - - local layerRepo = nerv.LayerRepo(layers, pr, global_conf) - - local connections_t = { - ["<input>[1]"] = ap("inputXDup[1]"), - ["<input>[2]"] = ap("inputHDup[1]"), - - [ap("inputXDup[1]")] = ap("resetGateL[1]"), - [ap("inputHDup[1]")] = ap("resetGateL[2]"), - [ap("inputXDup[2]")] = ap("updateGateL[1]"), - [ap("inputHDup[2]")] = ap("updateGateL[2]"), - [ap("updateGateL[1]")] = ap("updateGDup[1]"), - - [ap("resetGateL[1]")] = ap("resetGMulL[1]"), - [ap("inputHDup[3]")] = ap("resetGMulL[2]"), - - [ap("inputXDup[3]")] = ap("mainAffineL[1]"), - [ap("resetGMulL[1]")] = ap("mainAffineL[2]"), - [ap("mainAffineL[1]")] = ap("mainTanhL[1]"), - - [ap("updateGDup[1]")] = ap("updateGMulHL[1]"), - [ap("inputHDup[4]")] = ap("updateGMulHL[2]"), - [ap("updateGDup[2]")] = ap("updateGMulCL[1]"), - [ap("mainTanhL[1]")] = ap("updateGMulCL[2]"), - - [ap("inputHDup[5]")] = ap("updateMergeL[1]"), - [ap("updateGMulHL[1]")] = ap("updateMergeL[2]"), - [ap("updateGMulCL[1]")] = ap("updateMergeL[3]"), - - [ap("updateMergeL[1]")] = "<output>[1]", - } - - self.dagL = nerv.DAGLayerT(self.id, global_conf, - {["dim_in"] = self.dim_in, ["dim_out"] = self.dim_out, ["sub_layers"] = layerRepo, - ["connections"] = connections_t}) - - self:check_dim_len(2, 1) -- x, h and h -end - -function GRULayerT:init(batch_size, chunk_size) - self.dagL:init(batch_size, chunk_size) -end - -function GRULayerT:batch_resize(batch_size, chunk_size) - self.dagL:batch_resize(batch_size, chunk_size) -end - -function GRULayerT:update(bp_err, input, output, t) - self.dagL:update(bp_err, input, output, t) -end - -function GRULayerT:propagate(input, output, t) - self.dagL:propagate(input, output, t) -end - -function GRULayerT:back_propagate(bp_err, next_bp_err, input, output, t) - self.dagL:back_propagate(bp_err, next_bp_err, input, output, t) -end - -function GRULayerT:get_params() - return self.dagL:get_params() -end diff --git a/nerv/tnn/layersT/lstm_t.lua b/nerv/tnn/layersT/lstm_t.lua deleted file mode 100644 index 04d0600..0000000 --- a/nerv/tnn/layersT/lstm_t.lua +++ /dev/null @@ -1,124 +0,0 @@ -local LSTMLayerT = nerv.class('nerv.LSTMLayerT', 'nerv.LayerT') - -function LSTMLayerT:__init(id, global_conf, layer_conf) - --input1:x input2:h input3:c - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf - - --prepare a DAGLayerT to hold the lstm structure - local pr = layer_conf.pr - if pr == nil then - pr = nerv.ParamRepo() - end - - local function ap(str) - return self.id .. '.' .. str - end - - local layers = { - ["nerv.CombinerLayer"] = { - [ap("inputXDup")] = {{}, {["dim_in"] = {self.dim_in[1]}, - ["dim_out"] = {self.dim_in[1], self.dim_in[1], self.dim_in[1], self.dim_in[1]}, ["lambda"] = {1}}}, - [ap("inputHDup")] = {{}, {["dim_in"] = {self.dim_in[2]}, - ["dim_out"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2], self.dim_in[2]}, ["lambda"] = {1}}}, - [ap("inputCDup")] = {{}, {["dim_in"] = {self.dim_in[3]}, - ["dim_out"] = {self.dim_in[3], self.dim_in[3], self.dim_in[3]}, ["lambda"] = {1}}}, - [ap("mainCDup")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3], self.dim_in[3], self.dim_in[3]}, - ["lambda"] = {1, 1}}}, - }, - ["nerv.AffineLayer"] = { - [ap("mainAffineL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]}, - ["dim_out"] = {self.dim_out[1]}, ["pr"] = pr}}, - }, - ["nerv.TanhLayer"] = { - [ap("mainTanhL")] = {{}, {["dim_in"] = {self.dim_out[1]}, ["dim_out"] = {self.dim_out[1]}}}, - [ap("outputTanhL")] = {{}, {["dim_in"] = {self.dim_out[1]}, ["dim_out"] = {self.dim_out[1]}}}, - }, - ["nerv.GateFLayer"] = { - [ap("forgetGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2], self.dim_in[3]}, - ["dim_out"] = {self.dim_in[3]}, ["pr"] = pr}}, - [ap("inputGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2], self.dim_in[3]}, - ["dim_out"] = {self.dim_in[3]}, ["pr"] = pr}}, - [ap("outputGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2], self.dim_in[3]}, - ["dim_out"] = {self.dim_in[3]}, ["pr"] = pr}}, - - }, - ["nerv.ElemMulLayer"] = { - [ap("inputGMulL")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3]}}}, - [ap("forgetGMulL")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3]}}}, - [ap("outputGMulL")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3]}}}, - }, - } - - local layerRepo = nerv.LayerRepo(layers, pr, global_conf) - - local connections_t = { - ["<input>[1]"] = ap("inputXDup[1]"), - ["<input>[2]"] = ap("inputHDup[1]"), - ["<input>[3]"] = ap("inputCDup[1]"), - - [ap("inputXDup[1]")] = ap("mainAffineL[1]"), - [ap("inputHDup[1]")] = ap("mainAffineL[2]"), - [ap("mainAffineL[1]")] = ap("mainTanhL[1]"), - - [ap("inputXDup[2]")] = ap("inputGateL[1]"), - [ap("inputHDup[2]")] = ap("inputGateL[2]"), - [ap("inputCDup[1]")] = ap("inputGateL[3]"), - - [ap("inputXDup[3]")] = ap("forgetGateL[1]"), - [ap("inputHDup[3]")] = ap("forgetGateL[2]"), - [ap("inputCDup[2]")] = ap("forgetGateL[3]"), - - [ap("mainTanhL[1]")] = ap("inputGMulL[1]"), - [ap("inputGateL[1]")] = ap("inputGMulL[2]"), - - [ap("inputCDup[3]")] = ap("forgetGMulL[1]"), - [ap("forgetGateL[1]")] = ap("forgetGMulL[2]"), - - [ap("inputGMulL[1]")] = ap("mainCDup[1]"), - [ap("forgetGMulL[1]")] = ap("mainCDup[2]"), - - [ap("inputXDup[4]")] = ap("outputGateL[1]"), - [ap("inputHDup[4]")] = ap("outputGateL[2]"), - [ap("mainCDup[3]")] = ap("outputGateL[3]"), - - [ap("mainCDup[2]")] = "<output>[2]", - [ap("mainCDup[1]")] = ap("outputTanhL[1]"), - - [ap("outputTanhL[1]")] = ap("outputGMulL[1]"), - [ap("outputGateL[1]")] = ap("outputGMulL[2]"), - - [ap("outputGMulL[1]")] = "<output>[1]", - } - self.dagL = nerv.DAGLayerT(self.id, global_conf, - {["dim_in"] = self.dim_in, ["dim_out"] = self.dim_out, ["sub_layers"] = layerRepo, - ["connections"] = connections_t}) - - self:check_dim_len(3, 2) -- x, h, c and h, c -end - -function LSTMLayerT:init(batch_size, chunk_size) - self.dagL:init(batch_size, chunk_size) -end - -function LSTMLayerT:batch_resize(batch_size, chunk_size) - self.dagL:batch_resize(batch_size, chunk_size) -end - -function LSTMLayerT:update(bp_err, input, output, t) - self.dagL:update(bp_err, input, output, t) -end - -function LSTMLayerT:propagate(input, output, t) - self.dagL:propagate(input, output, t) -end - -function LSTMLayerT:back_propagate(bp_err, next_bp_err, input, output, t) - self.dagL:back_propagate(bp_err, next_bp_err, input, output, t) -end - -function LSTMLayerT:get_params() - return self.dagL:get_params() -end diff --git a/nerv/tnn/layersT/softmax_ce_t.lua b/nerv/tnn/layersT/softmax_ce_t.lua deleted file mode 100644 index a9ce975..0000000 --- a/nerv/tnn/layersT/softmax_ce_t.lua +++ /dev/null @@ -1,93 +0,0 @@ -local SoftmaxCELayer = nerv.class("nerv.SoftmaxCELayerT", "nerv.LayerT") - -function SoftmaxCELayer:__init(id, global_conf, layer_conf) - self.id = id - self.gconf = global_conf - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.compressed = layer_conf.compressed - if self.compressed == nil then - self.compressed = false - end - self:check_dim_len(2, -1) -- two inputs: nn output and label -end - -function SoftmaxCELayer:init(batch_size, chunk_size) - if not self.compressed and (self.dim_in[1] ~= self.dim_in[2]) then - nerv.error("mismatching dimensions of previous network output and labels") - end - if chunk_size == nil then - chunk_size = 1 - end - self.total_ce = 0.0 - self.total_correct = 0 - self.total_frames = 0 - self.softmax_t = {} - self.ce_t = {} - for t = 1, chunk_size do - self.softmax_t[t] = self.gconf.cumat_type(batch_size, self.dim_in[1]) - self.ce_t[t] = self.gconf.cumat_type(batch_size, self.dim_in[1]) - end -end - -function SoftmaxCELayer:batch_resize(batch_size, chunk_size) - if chunk_size == nil then - chunk_size = 1 - end - for t = 1, chunk_size do - if self.softmax_t[t]:nrow() ~= batch_size then - self.softmax_t[t] = self.gconf.cumat_type(batch_size, self.dim_in[1]) - self.ce_t[t] = self.gconf.cumat_type(batch_size, self.dim_in[1]) - end - end -end - -function SoftmaxCELayer:update(bp_err, input, output, t) - -- no params, therefore do nothing -end - -function SoftmaxCELayer:propagate(input, output, t) - if t == nil then - t = 1 - end - local softmax = self.softmax_t[t] - local ce = self.ce_t[t] - local classified = softmax:softmax(input[1]) - local label = input[2] - ce:log_elem(softmax) - if self.compressed then - label = label:decompress(input[1]:ncol()) - end - ce:mul_elem(ce, label) - ce = ce:rowsum() - if output[1] ~= nil then - output[1]:copy_fromd(ce) - end - -- add total ce - self.total_ce = self.total_ce - ce:colsum()[0][0] - self.total_frames = self.total_frames + softmax:nrow() - -- TODO: add colsame for uncompressed label - if self.compressed then - self.total_correct = self.total_correct + classified:colsame(input[2])[0][0] - end -end - -function SoftmaxCELayer:back_propagate(bp_err, next_bp_err, input, output, t) - -- softmax output - label - if t == nil then - t = 1 - end - local label = input[2] - if self.compressed then - label = label:decompress(input[1]:ncol()) - end - local nbe = next_bp_err[1] - nbe:add(self.softmax_t[t], label, 1.0, -1.0) - if bp_err[1] ~= nil then - nbe:scale_rows_by_col(bp_err[1]) - end -end - -function SoftmaxCELayer:get_params() - return nerv.ParamRepo({}) -end |