diff options
Diffstat (limited to 'nerv/tnn/layersT/softmax_ce_t.lua')
-rw-r--r-- | nerv/tnn/layersT/softmax_ce_t.lua | 93 |
1 files changed, 93 insertions, 0 deletions
diff --git a/nerv/tnn/layersT/softmax_ce_t.lua b/nerv/tnn/layersT/softmax_ce_t.lua new file mode 100644 index 0000000..a9ce975 --- /dev/null +++ b/nerv/tnn/layersT/softmax_ce_t.lua @@ -0,0 +1,93 @@ +local SoftmaxCELayer = nerv.class("nerv.SoftmaxCELayerT", "nerv.LayerT") + +function SoftmaxCELayer:__init(id, global_conf, layer_conf) + self.id = id + self.gconf = global_conf + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self.compressed = layer_conf.compressed + if self.compressed == nil then + self.compressed = false + end + self:check_dim_len(2, -1) -- two inputs: nn output and label +end + +function SoftmaxCELayer:init(batch_size, chunk_size) + if not self.compressed and (self.dim_in[1] ~= self.dim_in[2]) then + nerv.error("mismatching dimensions of previous network output and labels") + end + if chunk_size == nil then + chunk_size = 1 + end + self.total_ce = 0.0 + self.total_correct = 0 + self.total_frames = 0 + self.softmax_t = {} + self.ce_t = {} + for t = 1, chunk_size do + self.softmax_t[t] = self.gconf.cumat_type(batch_size, self.dim_in[1]) + self.ce_t[t] = self.gconf.cumat_type(batch_size, self.dim_in[1]) + end +end + +function SoftmaxCELayer:batch_resize(batch_size, chunk_size) + if chunk_size == nil then + chunk_size = 1 + end + for t = 1, chunk_size do + if self.softmax_t[t]:nrow() ~= batch_size then + self.softmax_t[t] = self.gconf.cumat_type(batch_size, self.dim_in[1]) + self.ce_t[t] = self.gconf.cumat_type(batch_size, self.dim_in[1]) + end + end +end + +function SoftmaxCELayer:update(bp_err, input, output, t) + -- no params, therefore do nothing +end + +function SoftmaxCELayer:propagate(input, output, t) + if t == nil then + t = 1 + end + local softmax = self.softmax_t[t] + local ce = self.ce_t[t] + local classified = softmax:softmax(input[1]) + local label = input[2] + ce:log_elem(softmax) + if self.compressed then + label = label:decompress(input[1]:ncol()) + end + ce:mul_elem(ce, label) + ce = ce:rowsum() + if output[1] ~= nil then + output[1]:copy_fromd(ce) + end + -- add total ce + self.total_ce = self.total_ce - ce:colsum()[0][0] + self.total_frames = self.total_frames + softmax:nrow() + -- TODO: add colsame for uncompressed label + if self.compressed then + self.total_correct = self.total_correct + classified:colsame(input[2])[0][0] + end +end + +function SoftmaxCELayer:back_propagate(bp_err, next_bp_err, input, output, t) + -- softmax output - label + if t == nil then + t = 1 + end + local label = input[2] + if self.compressed then + label = label:decompress(input[1]:ncol()) + end + local nbe = next_bp_err[1] + nbe:add(self.softmax_t[t], label, 1.0, -1.0) + if bp_err[1] ~= nil then + nbe:scale_rows_by_col(bp_err[1]) + end +end + +function SoftmaxCELayer:get_params() + return nerv.ParamRepo({}) +end |