require 'libkaldiio' require 'speech_utils' local KaldiReader = nerv.class("nerv.KaldiReader", "nerv.DataReader") function KaldiReader:__init(global_conf, reader_conf) self.feat_id = reader_conf.id self.frm_ext = reader_conf.frm_ext self.need_key = reader_conf.need_key -- for sequence training self.gconf = global_conf if self.gconf.use_cpu then self.mat_type = self.gconf.mmat_type else self.mat_type = self.gconf.cumat_type end self.debug = global_conf.debug if self.debug == nil then self.debug = false end self.feat_repo = nerv.KaldiFeatureRepo(reader_conf.feature_rspecifier) self.lab_repo = {} if reader_conf.mlfs then for id, mlf_spec in pairs(reader_conf.mlfs) do if mlf_spec.format == nil then nerv.error("format spec is expected for label %s", id) end self.lab_repo[id] = nerv.KaldiLabelRepo(mlf_spec.targets_rspecifier, mlf_spec.format) end end self.lookup_repo = {} if reader_conf.lookup then for id, lookup_spec in pairs(reader_conf.lookup) do if lookup_spec.map_rspecifier == nil then nerv.error("map spec is expected for lookup %s", id) end self.lookup_repo[id] = nerv.KaldiLookupFeatureRepo(lookup_spec.targets_rspecifier, lookup_spec.map_rspecifier) end end end function KaldiReader:get_data() local fail_to_read_alignment = false if self.feat_repo:is_end() then return nil end local res = {} -- read Kaldi feature local raw = self.feat_repo:cur_utter(self.debug) if not self.gconf.use_cpu then raw = self.gconf.cumat_type.new_from_host(raw) end local rearranged if self.frm_ext and self.frm_ext > 0 then local step = self.frm_ext * 2 + 1 -- expand the feature local expanded = self.mat_type(raw:nrow(), raw:ncol() * step) expanded:expand_frm(raw, self.frm_ext) -- rearrange the feature (``transpose'' operation in TNet) if self.gconf.rearrange then rearranged = expanded:create() rearranged:rearrange_frm(expanded, step) else rearranged = expanded end else rearranged = raw end local feat_utter if self.gconf.frm_trim then feat_utter = self.gconf.mmat_type(rearranged:nrow() - self.gconf.frm_trim * 2, rearranged:ncol()) rearranged:copy_toh(feat_utter, self.gconf.frm_trim, rearranged:nrow() - self.gconf.frm_trim) else if self.gconf.use_cpu then feat_utter = rearranged else feat_utter = self.gconf.mmat_type(rearranged:nrow(), rearranged:ncol()) rearranged:copy_toh(feat_utter) end end res[self.feat_id] = feat_utter if self.need_key then res["key"] = self.feat_repo:key() end -- add corresponding labels for id, repo in pairs(self.lab_repo) do local lab_utter = repo:get_utter(self.feat_repo, feat_utter:nrow(), self.debug) if lab_utter == nil then fail_to_read_alignment = true end res[id] = lab_utter end -- add corresponding lookup features for id, repo in pairs(self.lookup_repo) do local lookup_utter = repo:get_utter(self.feat_repo, feat_utter:nrow(), self.debug) local nrow = lookup_utter:nrow() if nrow < feat_utter:nrow() then -- repeat the last frame local nlu = lookup_utter:create(feat_utter:nrow()) nlu:copy_from(lookup_utter, 0, nrow) row = nlu[nrow - 1] for i = 0, feat_utter:nrow() - nrow - 1 do nlu:copy_from(nlu, nrow - 1, nrow, nrow + i) end lookup_utter = nlu end res[id] = lookup_utter end -- move the pointer to next self.feat_repo:next() collectgarbage("collect") if fail_to_read_alignment then nerv.info("[kaldi] utterance %s alignment not found, skip it.", self.feat_repo:key()) res = self:get_data() end return res end