require 'libhtkio' require 'speech_utils' local TNetReader = nerv.class("nerv.TNetReader", "nerv.DataReader") function TNetReader:__init(global_conf, reader_conf) self.feat_id = reader_conf.id self.frm_ext = reader_conf.frm_ext self.gconf = global_conf self.debug = global_conf.debug if self.debug == nil then self.debug = false end self.feat_repo = nerv.TNetFeatureRepo(reader_conf.scp_file, reader_conf.conf_file, reader_conf.frm_ext) self.lab_repo = {} if reader_conf.mlfs then for id, mlf_spec in pairs(reader_conf.mlfs) do self.lab_repo[id] = nerv.TNetLabelRepo(mlf_spec.file, mlf_spec.format, mlf_spec.format_arg, mlf_spec.dir, mlf_spec.ext) end end end function TNetReader:get_data() if self.feat_repo:is_end() then return nil end local res = {} -- read HTK feature local raw = self.gconf.cumat_type.new_from_host(self.feat_repo:cur_utter(self.debug)) local rearranged if self.frm_ext and self.frm_ext > 0 then local step = self.frm_ext * 2 + 1 -- expand the feature local expanded = self.gconf.cumat_type(raw:nrow(), raw:ncol() * step) expanded:expand_frm(raw, self.frm_ext) -- rearrange the feature (``transpose'' operation in TNet) if self.gconf.rearrange then rearranged = expanded:create() rearranged:rearrange_frm(expanded, step) else rearranged = expanded end else rearranged = raw end local feat_utter if self.gconf.frm_trim then feat_utter = self.gconf.mmat_type(rearranged:nrow() - self.gconf.frm_trim * 2, rearranged:ncol()) rearranged:copy_toh(feat_utter, self.gconf.frm_trim, rearranged:nrow() - self.gconf.frm_trim) else feat_utter = self.gconf.mmat_type(rearranged:nrow(), rearranged:ncol()) rearranged:copy_toh(feat_utter) end res[self.feat_id] = feat_utter -- add corresponding labels for id, repo in pairs(self.lab_repo) do local lab_utter = repo:get_utter(self.feat_repo, feat_utter:nrow(), self.debug) res[id] = lab_utter end -- move the pointer to next self.feat_repo:next() collectgarbage("collect") return res end