local SoftmaxCELayer = nerv.class("nerv.SoftmaxCELayer", "nerv.Layer")
function SoftmaxCELayer:__init(id, global_conf, layer_conf)
nerv.Layer.__init(self, id, global_conf, layer_conf)
self.compressed = layer_conf.compressed
if self.compressed == nil then
self.compressed = false
end
self:check_dim_len(2, -1) -- two inputs: nn output and label
end
function SoftmaxCELayer:bind_params()
-- do nothing
end
function SoftmaxCELayer:init(batch_size, chunk_size)
if not self.compressed and (self.dim_in[1] ~= self.dim_in[2]) then
nerv.error("mismatching dimensions of previous network output and labels")
end
if chunk_size == nil then
chunk_size = 1
end
self.total_ce = 0.0
self.total_correct = 0
self.total_frames = 0
self.softmax = {}
self.ce = {}
for t = 1, chunk_size do
self.softmax[t] = self.mat_type(batch_size, self.dim_in[1])
self.ce[t] = self.softmax[t]:create()
end
end
function SoftmaxCELayer:batch_resize(batch_size, chunk_size)
if chunk_size == nil then
chunk_size = 1
end
for t = 1, chunk_size do
if self.softmax[t]:nrow() ~= batch_size then
self.softmax[t] = self.mat_type(batch_size, self.dim_in[1])
self.ce[t] = self.softmax:create()
end
end
end
function SoftmaxCELayer:update(bp_err, input, output, t)
-- no params, therefore do nothing
end
function SoftmaxCELayer:propagate(input, output, t)
if t == nil then
t = 1
end
local softmax = self.softmax[t]
local ce = self.ce[t]
local classified = softmax:softmax(input[1])
local label = input[2]
ce:log_elem(softmax)
if self.compressed then
label = label:decompress(input[1]:ncol())
end
ce:mul_elem(ce, label)
ce = ce:rowsum()
ce:set_values_by_mask(self.gconf.mask[t], 0)
if output[1] ~= nil then
output[1]:copy_from(ce)
end
-- add total ce
self.total_ce = self.total_ce - ce:colsum()[0][0]
self.total_frames = self.total_frames + self.gconf.mask[t]:colsum()[0][0]
-- TODO: add colsame for uncompressed label
if self.compressed then
classified:set_values_by_mask(self.gconf.mask[t], -1)
self.total_correct = self.total_correct + classified:colsame(input[2])[0][0]
end
end
function SoftmaxCELayer:back_propagate(bp_err, next_bp_err, input, output, t)
-- softmax output - label
if t == nil then
t = 1
end
local label = input[2]
if self.compressed then
label = label:decompress(input[1]:ncol())
end
local nbe = next_bp_err[1]
nbe:add(self.softmax[t], label, 1.0, -1.0)
if bp_err[1] ~= nil then
nbe:scale_rows_by_col(bp_err[1])
end
end
function SoftmaxCELayer:get_params()
return nerv.ParamRepo({}, self.loc_type)
end