require 'libhtkio'
require 'speech_utils'
local TNetReader = nerv.class("nerv.TNetReader", "nerv.DataReader")
function TNetReader:__init(global_conf, reader_conf)
self.feat_id = reader_conf.id
self.frm_ext = reader_conf.frm_ext
self.gconf = global_conf
self.debug = global_conf.debug
if self.debug == nil then
self.debug = false
end
self.feat_repo = nerv.TNetFeatureRepo(reader_conf.scp_file,
reader_conf.conf_file,
reader_conf.frm_ext)
self.lab_repo = {}
if reader_conf.mlfs then
for id, mlf_spec in pairs(reader_conf.mlfs) do
self.lab_repo[id] = nerv.TNetLabelRepo(mlf_spec.file,
mlf_spec.format,
mlf_spec.format_arg,
mlf_spec.dir,
mlf_spec.ext)
end
end
end
function TNetReader:get_data()
if self.feat_repo:is_end() then
return nil
end
local res = {}
-- read HTK feature
local raw = self.gconf.cumat_type.new_from_host(self.feat_repo:cur_utter(self.debug))
local rearranged
if self.frm_ext and self.frm_ext > 0 then
local step = self.frm_ext * 2 + 1
-- expand the feature
local expanded = self.gconf.cumat_type(raw:nrow(), raw:ncol() * step)
expanded:expand_frm(raw, self.frm_ext)
-- rearrange the feature (``transpose'' operation in TNet)
if self.gconf.rearrange then
rearranged = expanded:create()
rearranged:rearrange_frm(expanded, step)
else
rearranged = expanded
end
else
rearranged = raw
end
local feat_utter
if self.gconf.frm_trim then
feat_utter = self.gconf.mmat_type(rearranged:nrow() - self.gconf.frm_trim * 2, rearranged:ncol())
rearranged:copy_toh(feat_utter, self.gconf.frm_trim, rearranged:nrow() - self.gconf.frm_trim)
else
feat_utter = self.gconf.mmat_type(rearranged:nrow(), rearranged:ncol())
rearranged:copy_toh(feat_utter)
end
res[self.feat_id] = feat_utter
-- add corresponding labels
for id, repo in pairs(self.lab_repo) do
local lab_utter = repo:get_utter(self.feat_repo,
feat_utter:nrow(),
self.debug)
res[id] = lab_utter
end
-- move the pointer to next
self.feat_repo:next()
collectgarbage("collect")
return res
end