summaryrefslogblamecommitdiff
path: root/kaldi_io/init.lua
blob: 751247a2d69e65fbcc5dc021413e15a5fd44c9fd (plain) (tree)
1
2
3
4
5
6
7
8
9






                                                                     
                                                                 
                            




                                             






                                                                          






                                                                                
           
       
                         






                                                                                              
           
       


                               
                                        




                                   



                                                      



                                             
                                                                     















                                                                                                         





                                                                                   
       
                                  


                                         


                                                        
                                                          
                                                   
                                

                                         

                           


                                                           
                                                          
                                                   
                                        
                                        
                                    
                                                              

                                                
                                                      



                                                            

                              


                               
                                  
                                                                                             

                             

              
require 'libkaldiio'
require 'speech_utils'
local KaldiReader = nerv.class("nerv.KaldiReader", "nerv.DataReader")

function KaldiReader:__init(global_conf, reader_conf)
    self.feat_id = reader_conf.id
    self.frm_ext = reader_conf.frm_ext
    self.need_key = reader_conf.need_key -- for sequence training
    self.gconf = global_conf
    if self.gconf.use_cpu then
        self.mat_type = self.gconf.mmat_type
    else
        self.mat_type = self.gconf.cumat_type
    end
    self.debug = global_conf.debug
    if self.debug == nil then
        self.debug = false
    end
    self.feat_repo = nerv.KaldiFeatureRepo(reader_conf.feature_rspecifier)

    self.lab_repo = {}
    if reader_conf.mlfs then
        for id, mlf_spec in pairs(reader_conf.mlfs) do
            if mlf_spec.format == nil then
                nerv.error("format spec is expected for label %s", id)
            end
            self.lab_repo[id] = nerv.KaldiLabelRepo(mlf_spec.targets_rspecifier,
                                                mlf_spec.format)
        end
    end
    self.lookup_repo = {}
    if reader_conf.lookup then
        for id, lookup_spec in pairs(reader_conf.lookup) do
            if lookup_spec.map_rspecifier == nil then
                nerv.error("map spec is expected for lookup %s", id)
            end
            self.lookup_repo[id] = nerv.KaldiLookupFeatureRepo(lookup_spec.targets_rspecifier,
                                                               lookup_spec.map_rspecifier)
        end
    end
end

function KaldiReader:get_data()
    local fail_to_read_alignment = false
    if self.feat_repo:is_end() then
        return nil
    end
    local res = {}
    -- read Kaldi feature
    local raw = self.feat_repo:cur_utter(self.debug)
    if not self.gconf.use_cpu then
        raw = self.gconf.cumat_type.new_from_host(raw)
    end
    local rearranged
    if self.frm_ext and self.frm_ext > 0 then
        local step = self.frm_ext * 2 + 1
        -- expand the feature
        local expanded = self.mat_type(raw:nrow(), raw:ncol() * step)
        expanded:expand_frm(raw, self.frm_ext)
        -- rearrange the feature (``transpose'' operation in TNet)
        if self.gconf.rearrange then
            rearranged = expanded:create()
            rearranged:rearrange_frm(expanded, step)
        else
            rearranged = expanded
        end
    else
        rearranged = raw
    end
    local feat_utter
    if self.gconf.frm_trim then
        feat_utter = self.gconf.mmat_type(rearranged:nrow() - self.gconf.frm_trim * 2, rearranged:ncol())
        rearranged:copy_toh(feat_utter, self.gconf.frm_trim, rearranged:nrow() - self.gconf.frm_trim)
    else
        if self.gconf.use_cpu then
            feat_utter = rearranged
        else
            feat_utter = self.gconf.mmat_type(rearranged:nrow(), rearranged:ncol())
            rearranged:copy_toh(feat_utter)
        end
    end
    res[self.feat_id] = feat_utter
    if self.need_key then
        res["key"] = self.feat_repo:key()
    end
    -- add corresponding labels
    for id, repo in pairs(self.lab_repo) do
        local lab_utter = repo:get_utter(self.feat_repo,
                                        feat_utter:nrow(),
                                        self.debug)
        if lab_utter == nil then
            fail_to_read_alignment = true
        end
        res[id] = lab_utter
    end
    -- add corresponding lookup features
    for id, repo in pairs(self.lookup_repo) do
        local lookup_utter = repo:get_utter(self.feat_repo,
                                        feat_utter:nrow(),
                                        self.debug)
        local nrow = lookup_utter:nrow()
        if nrow < feat_utter:nrow() then
            -- repeat the last frame
            local nlu = lookup_utter:create(feat_utter:nrow())
            nlu:copy_from(lookup_utter, 0, nrow)
            row = nlu[nrow - 1]
            for i = 0, feat_utter:nrow() - nrow - 1 do
                nlu:copy_from(nlu, nrow - 1, nrow, nrow + i)
            end
            lookup_utter = nlu
        end
        res[id] = lookup_utter
    end
    -- move the pointer to next
    self.feat_repo:next()
    collectgarbage("collect")
    if fail_to_read_alignment then
        nerv.info("[kaldi] utterance %s alignment not found, skip it.", self.feat_repo:key())
        res = self:get_data()
    end
    return res
end