local SoftmaxCELayer = nerv.class("nerv.SoftmaxCELayer", "nerv.Layer") function SoftmaxCELayer:__init(id, global_conf, layer_conf) nerv.Layer.__init(self, id, global_conf, layer_conf) self.compressed = layer_conf.compressed if self.compressed == nil then self.compressed = false end self:check_dim_len(2, -1) -- two inputs: nn output and label end function SoftmaxCELayer:bind_params() -- do nothing end function SoftmaxCELayer:init(batch_size, chunk_size) if not self.compressed and (self.dim_in[1] ~= self.dim_in[2]) then nerv.error("mismatching dimensions of previous network output and labels") end if chunk_size == nil then chunk_size = 1 end self.total_ce = 0.0 self.total_correct = 0 self.total_frames = 0 self.softmax = {} self.ce = {} for t = 1, chunk_size do self.softmax[t] = self.mat_type(batch_size, self.dim_in[1]) self.ce[t] = self.softmax[t]:create() end end function SoftmaxCELayer:batch_resize(batch_size, chunk_size) if chunk_size == nil then chunk_size = 1 end for t = 1, chunk_size do if self.softmax[t]:nrow() ~= batch_size then self.softmax[t] = self.mat_type(batch_size, self.dim_in[1]) self.ce[t] = self.softmax:create() end end end function SoftmaxCELayer:update(bp_err, input, output, t) -- no params, therefore do nothing end function SoftmaxCELayer:propagate(input, output, t) if t == nil then t = 1 end local softmax = self.softmax[t] local ce = self.ce[t] local classified = softmax:softmax(input[1]) local label = input[2] ce:log_elem(softmax) if self.compressed then label = label:decompress(input[1]:ncol()) end ce:mul_elem(ce, label) ce = ce:rowsum() ce:set_values_by_mask(self.gconf.mask[t], 0) if output[1] ~= nil then output[1]:copy_from(ce) end -- add total ce self.total_ce = self.total_ce - ce:colsum()[0][0] self.total_frames = self.total_frames + self.gconf.mask[t]:colsum()[0][0] -- TODO: add colsame for uncompressed label if self.compressed then classified:set_values_by_mask(self.gconf.mask[t], -1) self.total_correct = self.total_correct + classified:colsame(input[2])[0][0] end end function SoftmaxCELayer:back_propagate(bp_err, next_bp_err, input, output, t) -- softmax output - label if t == nil then t = 1 end local label = input[2] if self.compressed then label = label:decompress(input[1]:ncol()) end local nbe = next_bp_err[1] nbe:add(self.softmax[t], label, 1.0, -1.0) if bp_err[1] ~= nil then nbe:scale_rows_by_col(bp_err[1]) end end function SoftmaxCELayer:get_params() return nerv.ParamRepo({}, self.loc_type) end