summaryrefslogblamecommitdiff
path: root/htk_io/src/KaldiLib/Labels.cc
blob: 8b04cdec81af413fa03939d48c33474fd5ab3208 (plain) (tree)





















































































































































































































































































































































































                                                                                                                               



                                                         









































































































































































































































                                                                                                                               
#include "Labels.h"
#include "Timer.h"
#include "Error.h"
#include <cstdio>
#include <sstream>


namespace TNet {


    ////////////////////////////////////////////////////////////////////////
    // Class LabelRepository::
    void
        LabelRepository::
        Init(const char* pLabelMlfFile, const char* pOutputLabelMapFile, const char* pLabelDir, const char* pLabelExt) {
            InitMap(pLabelMlfFile, pOutputLabelMapFile, pLabelDir, pLabelExt);
        }
    void
        LabelRepository::
        InitExt(const char* pLabelMlfFile, const char* fmt, const char *arg, const char* pLabelDir, const char* pLabelExt) {
            if (strcmp(fmt, "map") == 0)
            {
                InitMap(pLabelMlfFile, arg, pLabelDir, pLabelExt);
                mlf_fmt = MAP;
            }
            else if (strcmp(fmt, "raw") == 0)
            {
                InitRaw(pLabelMlfFile, arg, pLabelDir, pLabelExt);
                mlf_fmt = RAW;
            }
        }

    void
        LabelRepository::
        InitMap(const char* pLabelMlfFile, const char* pOutputLabelMapFile, const char* pLabelDir, const char* pLabelExt)
        {
            assert(NULL != pLabelMlfFile);
            assert(NULL != pOutputLabelMapFile);

            // initialize the label streams
            delete mpLabelStream; //if NULL, does nothing
            delete _mpLabelStream;
            _mpLabelStream = new std::ifstream(pLabelMlfFile);
            mpLabelStream  = new IMlfStream(*_mpLabelStream);

            // Label stream is initialized, just test it
            if(!mpLabelStream->good()) 
                Error(std::string("Cannot open Label MLF file: ")+pLabelMlfFile);

            // Index the labels (good for randomized file lists)
            Timer tim; tim.Start();
            mpLabelStream->Index();
            tim.End(); mIndexTime += tim.Val(); 

            // Read the state-label to state-id map
            ReadOutputLabelMap(pOutputLabelMapFile);

            // Store the label dir/ext
            mpLabelDir = pLabelDir;
            mpLabelExt = pLabelExt;
        }

    void
        LabelRepository::
        InitRaw(const char* pLabelMlfFile, const char *arg, const char* pLabelDir, const char* pLabelExt)
        {
            assert(NULL != pLabelMlfFile);
            std::istringstream iss(arg); 
            size_t dim;
            iss >> dim;
            if (iss.fail() || dim <= 0)
                PError("[lab] malformed dimension specification");
            raw_dim = dim;
            // initialize the label streams
            delete mpLabelStream; //if NULL, does nothing
            delete _mpLabelStream;
            _mpLabelStream = new std::ifstream(pLabelMlfFile);
            mpLabelStream  = new IMlfStream(*_mpLabelStream);

            // Label stream is initialized, just test it
            if(!mpLabelStream->good()) 
                Error(std::string("Cannot open Label MLF file: ")+pLabelMlfFile);

            // Index the labels (good for randomized file lists)
            Timer tim; tim.Start();
            mpLabelStream->Index();
            tim.End(); mIndexTime += tim.Val(); 

            // Read the state-label to state-id map
            //ReadOutputLabelMap(pOutputLabelMapFile);

            // Store the label dir/ext
            mpLabelDir = pLabelDir;
            mpLabelExt = pLabelExt;
        }


    void 
        LabelRepository::
        GenDesiredMatrix(BfMatrix& rDesired, size_t nFrames, size_t sourceRate, const char* pFeatureLogical, bool has_vad)
        {
            //timer
            Timer tim; tim.Start();

            //Get the MLF stream reference...
            IMlfStream& mLabelStream = *mpLabelStream;
            //Build the file name of the label
            MakeHtkFileName(mpLabelFile, pFeatureLogical, mpLabelDir, mpLabelExt);

            //Find block in MLF file
            mLabelStream.Open(mpLabelFile);
            if(!mLabelStream.good()) {
                Error(std::string("Cannot open label MLF record: ") + mpLabelFile);
            }


            //resize the matrix
            if(nFrames < 1) {
                KALDI_ERR << "Number of frames:" << nFrames << " is lower than 1!!!\n"
                    << pFeatureLogical;
            }
            int label_map_size = mLabelMap.size();
            rDesired.Init(nFrames, label_map_size + (has_vad ? 2 : 0), true); //true: Zero()

            //aux variables
            std::string line, state;
            unsigned long long beg, end;
            size_t state_index;
            size_t trunc_frames = 0;
            TagToIdMap::iterator it;
            int vad_state;

            //parse the label file
            while(!mLabelStream.eof()) {
                std::getline(mLabelStream, line);
                if(line == "") continue; //skip newlines/comments from MLF
                if(line[0] == '#') continue;

                std::istringstream& iss = mGenDesiredMatrixStream;
                iss.clear();
                iss.str(line);

                //parse the line
                //begin
                iss >> std::ws >> beg;
                if(iss.fail()) { 
                    KALDI_ERR << "Cannot parse column 1 (begin)\n"
                        << "line: " << line << "\n"
                        << "file: " << mpLabelFile << "\n";
                }
                //end
                iss >> std::ws >> end;
                if(iss.fail()) { 
                    KALDI_ERR << "Cannot parse column 2 (end)\n"
                        << "line: " << line << "\n"
                        << "file: " << mpLabelFile << "\n";
                }
                //state tag
                iss >> std::ws >> state;
                if(iss.fail()) { 
                    KALDI_ERR << "Cannot parse column 3 (state_tag)\n"
                        << "line: " << line << "\n"
                        << "file: " << mpLabelFile << "\n";
                }

                if (has_vad) /* an additional column for vad */
                {
                    iss >> std::ws >> vad_state;
                    if(iss.fail()) { 
                        KALDI_ERR << "Cannot parse column 4 (vad_state)\n"
                            << "line: " << line << "\n"
                            << "file: " << mpLabelFile << "\n";
                    }
                }

                //fprintf(stderr, "Parsed: %lld %lld %s\n", beg, end, state.c_str());

                //divide beg/end by sourceRate and round up to get interval of frames
                beg = (beg+sourceRate/2)/sourceRate;
                end = (end+sourceRate/2)/sourceRate; 
                //beg = (int)round(beg / (double)sourceRate);
                //end = (int)round(end / (double)sourceRate); 

                //find the state id
                it = mLabelMap.find(state);
                if(mLabelMap.end() == it) {
                    Error(std::string("Unknown state tag: '") + state + "' file:'" + mpLabelFile);
                }
                state_index = it->second;

                // Fill the desired matrix
                for(unsigned long long frame=beg; frame<end; frame++) { 
                    //don't write after matrix... (possible longer transcript than feature file)
                    if(frame >= (int)rDesired.Rows()) { trunc_frames++; continue; }

                    //check the next frame is empty:
                    if(0.0 != rDesired[frame].Sum()) {
                        //ERROR!!!
                        //find out what was previously filled!!!
                        BaseFloat max = rDesired[frame].Max();
                        int idx = -1;
                        for(int i=0; i<(int)rDesired[frame].Dim(); i++) { 
                            if(rDesired[frame][i] == max) idx = i; 
                        }
                        for(it=mLabelMap.begin(); it!=mLabelMap.end(); ++it) {
                            if((int)it->second == idx) break;
                        }
                        std::string state_prev = "error";
                        if(it != mLabelMap.end()) {
                            state_prev = it->first;
                        }
                        //print the error message
                        std::ostringstream os; 
                        os << "Frame already assigned to other state, "
                            << " file: " << mpLabelFile 
                            << " frame: " << frame
                            << " nframes: " << nFrames 
                            << " sum: " << rDesired[frame].Sum()  
                            << " previously assigned to: " << state_prev << "(" << idx << ")" 
                            << " now should be assigned to: " << state << "(" << state_index << ")"
                            << "\n";
                        Error(os.str());
                    }

                    //fill the row
                    rDesired[(size_t)frame][state_index] = 1.0f;
                    if (has_vad)
                        rDesired[(size_t)frame][label_map_size + !!vad_state] = 1.0f;
                }
            }

            mLabelStream.Close();

            //check the desired matrix (rows sum up to 1.0)
            for(size_t i=0; i<rDesired.Rows(); ++i) {
                float desired_row_sum = rDesired[i].Sum();
                if(!desired_row_sum == 1.0) {
                    std::ostringstream os;
                    os << "Desired vector sum isn't 1.0, "
                        << " file: " << mpLabelFile 
                        << " row: " << i 
                        << " nframes: " << nFrames 
                        << " content: " << rDesired[i] 
                        << " sum: " << desired_row_sum  << "\n";
                    Error(os.str());
                }
            }

            //warning when truncating many frames
            if(trunc_frames > 10) {
                std::ostringstream os;
                os << "Truncated frames: " << trunc_frames 
                    << " Check sourcerate in features and validity of labels\n";
                Warning(os.str());
            }

            //timer
            tim.End(); mGenDesiredMatrixTime += tim.Val();
        }



    void
        LabelRepository::
        ReadOutputLabelMap(const char* file)
        {
            assert(mLabelMap.size() == 0);
            int i = 0;
            std::string state_tag;
            std::ifstream in(file);
            if(!in.good())
                Error(std::string("Cannot open OutputLabelMapFile: ")+file);

            in >> std::ws;
            while(!in.eof()) {
                in >> state_tag;
                in >> std::ws;
                assert(mLabelMap.find(state_tag) == mLabelMap.end());
                mLabelMap[state_tag] = i++;
            }

            in.close();
            assert(mLabelMap.size() > 0);
        }


    void 
        LabelRepository::
        GenDesiredMatrixExt(std::vector<BfMatrix>& rDesired, size_t nFrames, size_t sourceRate, const char* pFeatureLogical) {
            switch (mlf_fmt)
            {
                case MAP: GenDesiredMatrixExtMap(rDesired, nFrames, sourceRate, pFeatureLogical);
                            break;
                case RAW: GenDesiredMatrixExtRaw(rDesired, nFrames, sourceRate, pFeatureLogical);
                          break;
                default: assert(0);
            }
        }

    void 
        LabelRepository::
        GenDesiredMatrixExtMap(std::vector<BfMatrix>& rDesired, size_t nFrames, size_t sourceRate, const char* pFeatureLogical)
        {
            //timer
            Timer tim; tim.Start();

            //Get the MLF stream reference...
            IMlfStream& mLabelStream = *mpLabelStream;
            //Build the file name of the label
            MakeHtkFileName(mpLabelFile, pFeatureLogical, mpLabelDir, mpLabelExt);

            //Find block in MLF file
            mLabelStream.Open(mpLabelFile);
            if(!mLabelStream.good()) {
                Error(std::string("Cannot open label MLF record: ") + mpLabelFile);
            }


            //resize the matrix
            if(nFrames < 1) {
                KALDI_ERR << "Number of frames:" << nFrames << " is lower than 1!!!\n"
                    << pFeatureLogical;
            }

            size_t prev = rDesired.size();
            rDesired.resize(prev + 1, BfMatrix()); /* state + vad */
            int label_map_size = mLabelMap.size();
            rDesired[prev].Init(nFrames, 1, true); //true: Zero()

            //aux variables
            std::string line, state;
            unsigned long long beg, end;
            size_t state_index;
            size_t trunc_frames = 0;
            TagToIdMap::iterator it;

            //parse the label file
            while(!mLabelStream.eof()) {
                std::getline(mLabelStream, line);
                if(line == "") continue; //skip newlines/comments from MLF
                if(line[0] == '#') continue;

                std::istringstream& iss = mGenDesiredMatrixStream;
                iss.clear();
                iss.str(line);

                //parse the line
                //begin
                iss >> std::ws >> beg;
                if(iss.fail()) { 
                    KALDI_ERR << "Cannot parse column 1 (begin)\n"
                        << "line: " << line << "\n"
                        << "file: " << mpLabelFile << "\n";
                }
                //end
                iss >> std::ws >> end;
                if(iss.fail()) { 
                    KALDI_ERR << "Cannot parse column 2 (end)\n"
                        << "line: " << line << "\n"
                        << "file: " << mpLabelFile << "\n";
                }
                //state tag
                iss >> std::ws >> state;
                if(iss.fail()) { 
                    KALDI_ERR << "Cannot parse column 3 (state_tag)\n"
                        << "line: " << line << "\n"
                        << "file: " << mpLabelFile << "\n";
                }


                //fprintf(stderr, "Parsed: %lld %lld %s\n", beg, end, state.c_str());

                //divide beg/end by sourceRate and round up to get interval of frames
                beg = (beg+sourceRate/2)/sourceRate;
                if (end == (unsigned long long)-1)
                    end = rDesired[prev].Rows();
                else
                    end = (end+sourceRate/2)/sourceRate; 
                //beg = (int)round(beg / (double)sourceRate);
                //end = (int)round(end / (double)sourceRate); 

                //find the state id
                it = mLabelMap.find(state);
                if(mLabelMap.end() == it) {
                    Error(std::string("Unknown state tag: '") + state + "' file:'" + mpLabelFile);
                }
                state_index = it->second;

                // Fill the desired matrix
                for(unsigned long long frame=beg; frame<end; frame++) { 
                    //don't write after matrix... (possible longer transcript than feature file)
                    if(frame >= (int)rDesired[prev].Rows()) { trunc_frames++; continue; }

                    //check the next frame is empty:
                    if(0.0 != rDesired[prev][frame][0]) {
                        //ERROR!!!
                        //find out what was previously filled!!!
                        /*
                        BaseFloat max = rDesired[prev][frame].Max();
                        int idx = -1;
                        for(int i=0; i<(int)rDesired[prev][frame].Dim(); i++) { 
                            if(rDesired[prev][frame][i] == max) idx = i; 
                        }
                        */
                        BaseFloat max = rDesired[prev][frame][0];
                        int idx = round(max);
                        for(it=mLabelMap.begin(); it!=mLabelMap.end(); ++it) {
                            if((int)it->second == idx) break;
                        }
                        std::string state_prev = "error";
                        if(it != mLabelMap.end()) {
                            state_prev = it->first;
                        }
                        //print the error message
                        std::ostringstream os; 
                        os << "Frame already assigned to other state, "
                            << " file: " << mpLabelFile 
                            << " frame: " << frame
                            << " nframes: " << nFrames 
                            << " sum: " << max
                            << " previously assigned to: " << state_prev << "(" << idx << ")" 
                            << " now should be assigned to: " << state << "(" << state_index << ")"
                            << "\n";
                        Error(os.str());
                    }

                    //fill the row
                    //rDesired[prev][(size_t)frame][state_index] = 1.0f;
                    rDesired[prev][(size_t)frame][0] = state_index;
                }
            }

            mLabelStream.Close();
            /*
            //check the desired matrix (rows sum up to 1.0)
            for(size_t i=0; i<rDesired[prev].Rows(); ++i) {
                float desired_row_sum = rDesired[prev][i].Sum();
                if(!desired_row_sum == 1.0) {
                    std::ostringstream os;
                    os << "Desired vector sum isn't 1.0, "
                        << " file: " << mpLabelFile 
                        << " row: " << i 
                        << " nframes: " << nFrames 
                        << " content: " << rDesired[prev][i] 
                        << " sum: " << desired_row_sum  << "\n";
                    Error(os.str());
                }
            }
            */

            //warning when truncating many frames
            if(trunc_frames > 10) {
                std::ostringstream os;
                os << "Truncated frames: " << trunc_frames 
                    << " Check sourcerate in features and validity of labels\n";
                Warning(os.str());
            }

            //timer
            tim.End(); mGenDesiredMatrixTime += tim.Val();
        }

    void 
        LabelRepository::
        GenDesiredMatrixExtRaw(std::vector<BfMatrix>& rDesired, size_t nFrames, size_t sourceRate, const char* pFeatureLogical)
        {
            //timer
            Timer tim; tim.Start();

            //Get the MLF stream reference...
            IMlfStream& mLabelStream = *mpLabelStream;
            //Build the file name of the label
            MakeHtkFileName(mpLabelFile, pFeatureLogical, mpLabelDir, mpLabelExt);

            //Find block in MLF file
            mLabelStream.Open(mpLabelFile);
            if(!mLabelStream.good()) {
                Error(std::string("Cannot open label MLF record: ") + mpLabelFile);
            }


            //resize the matrix
            if(nFrames < 1) {
                KALDI_ERR << "Number of frames:" << nFrames << " is lower than 1!!!\n"
                    << pFeatureLogical;
            }

            size_t prev = rDesired.size();
            rDesired.resize(prev + 1, BfMatrix()); /* state + vad */
            rDesired[prev].Init(nFrames, raw_dim, true); //true: Zero()

            //aux variables
            std::string line, state;
            unsigned long long beg, end;
            size_t trunc_frames = 0;
            Vector<BaseFloat> raw;
            raw.Init(raw_dim);

            //parse the label file
            while(!mLabelStream.eof()) {
                std::getline(mLabelStream, line);
                if(line == "") continue; //skip newlines/comments from MLF
                if(line[0] == '#') continue;

                std::istringstream& iss = mGenDesiredMatrixStream;
                iss.clear();
                iss.str(line);

                //parse the line
                //begin
                iss >> std::ws >> beg;
                if(iss.fail()) { 
                    KALDI_ERR << "Cannot parse column 1 (begin)\n"
                        << "line: " << line << "\n"
                        << "file: " << mpLabelFile << "\n";
                }
                //end
                iss >> std::ws >> end;
                if(iss.fail()) { 
                    KALDI_ERR << "Cannot parse column 2 (end)\n"
                        << "line: " << line << "\n"
                        << "file: " << mpLabelFile << "\n";
                }

                for (size_t i = 0; i < raw_dim; i++)
                {
                    if (iss.eof())
                        PError("[label] insufficient columns for the label: %s", mpLabelFile);
                    iss >> raw[i];
                    if (iss.fail())
                        PError("[label] cannot parse raw value for the label: %s", mpLabelFile);
                }
                /*
                   for (size_t i = 0; i < raw_dim; i++)
                   fprintf(stderr, "%.3f", raw[i]);
                   fprintf(stderr, "\n");
                   */
                //divide beg/end by sourceRate and round up to get interval of frames
                beg = (beg+sourceRate/2)/sourceRate;
                if (end == (unsigned long long)-1)
                    end = rDesired[prev].Rows();
                else
                    end = (end+sourceRate/2)/sourceRate; 
                //printf("end:%lld\n", end);
                //beg = (int)round(beg / (double)sourceRate);
                //end = (int)round(end / (double)sourceRate); 

                // Fill the desired matrix
                for(unsigned long long frame=beg; frame<end; frame++) { 
                    //don't write after matrix... (possible longer transcript than feature file)
                    if(frame >= (int)rDesired[prev].Rows()) { trunc_frames++; continue; }

                    //check the next frame is empty:
                    if(0.0 != rDesired[prev][frame][0]) {
                        //ERROR!!!
                        //find out what was previously filled!!!
                        /*
                           BaseFloat max = rDesired[prev][frame].Max();
                           int idx = -1;
                           for(int i=0; i<(int)rDesired[prev][frame].Dim(); i++) { 
                           if(rDesired[prev][frame][i] == max) idx = i; 
                           }
                           */
                        BaseFloat max = rDesired[prev][frame][0];
                        //print the error message
                        std::ostringstream os; 
                        os << "Frame already assigned to other state, "
                            << " file: " << mpLabelFile 
                            << " frame: " << frame
                            << " nframes: " << nFrames 
                            << " sum: " << max
                            << "\n";
                        Error(os.str());
                    }

                    //fill the row
                    //rDesired[prev][(size_t)frame][state_index] = 1.0f;
                    rDesired[prev][(size_t)frame].Copy(raw);
                }
            }

            mLabelStream.Close();
            /*
            //check the desired matrix (rows sum up to 1.0)
            for(size_t i=0; i<rDesired[prev].Rows(); ++i) {
            float desired_row_sum = rDesired[prev][i].Sum();
            if(!desired_row_sum == 1.0) {
            std::ostringstream os;
            os << "Desired vector sum isn't 1.0, "
            << " file: " << mpLabelFile 
            << " row: " << i 
            << " nframes: " << nFrames 
            << " content: " << rDesired[prev][i] 
            << " sum: " << desired_row_sum  << "\n";
            Error(os.str());
            }
            }
            */

            //warning when truncating many frames
            if(trunc_frames > 10) {
                std::ostringstream os;
                os << "Truncated frames: " << trunc_frames 
                    << " Check sourcerate in features and validity of labels\n";
                Warning(os.str());
            }

            //timer
            tim.End(); mGenDesiredMatrixTime += tim.Val();
        }

}//namespace