diff options
Diffstat (limited to 'tnet_io/KaldiLib/Labels.cc')
-rw-r--r-- | tnet_io/KaldiLib/Labels.cc | 609 |
1 files changed, 609 insertions, 0 deletions
diff --git a/tnet_io/KaldiLib/Labels.cc b/tnet_io/KaldiLib/Labels.cc new file mode 100644 index 0000000..1af4447 --- /dev/null +++ b/tnet_io/KaldiLib/Labels.cc @@ -0,0 +1,609 @@ +#include "Labels.h" +#include "Timer.h" +#include "Error.h" +#include <cstdio> +#include <sstream> + + +namespace TNet { + + + //////////////////////////////////////////////////////////////////////// + // Class LabelRepository:: + void + LabelRepository:: + Init(const char* pLabelMlfFile, const char* pOutputLabelMapFile, const char* pLabelDir, const char* pLabelExt) { + InitMap(pLabelMlfFile, pOutputLabelMapFile, pLabelDir, pLabelExt); + } + void + LabelRepository:: + InitExt(const char* pLabelMlfFile, const char* fmt, const char *arg, const char* pLabelDir, const char* pLabelExt) { + if (strcmp(fmt, "map") == 0) + { + InitMap(pLabelMlfFile, arg, pLabelDir, pLabelExt); + mlf_fmt = MAP; + } + else if (strcmp(fmt, "raw") == 0) + { + InitRaw(pLabelMlfFile, arg, pLabelDir, pLabelExt); + mlf_fmt = RAW; + } + } + + void + LabelRepository:: + InitMap(const char* pLabelMlfFile, const char* pOutputLabelMapFile, const char* pLabelDir, const char* pLabelExt) + { + assert(NULL != pLabelMlfFile); + assert(NULL != pOutputLabelMapFile); + + // initialize the label streams + delete mpLabelStream; //if NULL, does nothing + delete _mpLabelStream; + _mpLabelStream = new std::ifstream(pLabelMlfFile); + mpLabelStream = new IMlfStream(*_mpLabelStream); + + // Label stream is initialized, just test it + if(!mpLabelStream->good()) + Error(std::string("Cannot open Label MLF file: ")+pLabelMlfFile); + + // Index the labels (good for randomized file lists) + Timer tim; tim.Start(); + mpLabelStream->Index(); + tim.End(); mIndexTime += tim.Val(); + + // Read the state-label to state-id map + ReadOutputLabelMap(pOutputLabelMapFile); + + // Store the label dir/ext + mpLabelDir = pLabelDir; + mpLabelExt = pLabelExt; + } + + void + LabelRepository:: + InitRaw(const char* pLabelMlfFile, const char *arg, const char* pLabelDir, const char* pLabelExt) + { + assert(NULL != pLabelMlfFile); + std::istringstream iss(arg); + size_t dim; + iss >> dim; + if (iss.fail() || dim <= 0) + PError("[lab] malformed dimension specification"); + raw_dim = dim; + // initialize the label streams + delete mpLabelStream; //if NULL, does nothing + delete _mpLabelStream; + _mpLabelStream = new std::ifstream(pLabelMlfFile); + mpLabelStream = new IMlfStream(*_mpLabelStream); + + // Label stream is initialized, just test it + if(!mpLabelStream->good()) + Error(std::string("Cannot open Label MLF file: ")+pLabelMlfFile); + + // Index the labels (good for randomized file lists) + Timer tim; tim.Start(); + mpLabelStream->Index(); + tim.End(); mIndexTime += tim.Val(); + + // Read the state-label to state-id map + //ReadOutputLabelMap(pOutputLabelMapFile); + + // Store the label dir/ext + mpLabelDir = pLabelDir; + mpLabelExt = pLabelExt; + } + + + void + LabelRepository:: + GenDesiredMatrix(BfMatrix& rDesired, size_t nFrames, size_t sourceRate, const char* pFeatureLogical, bool has_vad) + { + //timer + Timer tim; tim.Start(); + + //Get the MLF stream reference... + IMlfStream& mLabelStream = *mpLabelStream; + //Build the file name of the label + MakeHtkFileName(mpLabelFile, pFeatureLogical, mpLabelDir, mpLabelExt); + + //Find block in MLF file + mLabelStream.Open(mpLabelFile); + if(!mLabelStream.good()) { + Error(std::string("Cannot open label MLF record: ") + mpLabelFile); + } + + + //resize the matrix + if(nFrames < 1) { + KALDI_ERR << "Number of frames:" << nFrames << " is lower than 1!!!\n" + << pFeatureLogical; + } + int label_map_size = mLabelMap.size(); + rDesired.Init(nFrames, label_map_size + (has_vad ? 2 : 0), true); //true: Zero() + + //aux variables + std::string line, state; + unsigned long long beg, end; + size_t state_index; + size_t trunc_frames = 0; + TagToIdMap::iterator it; + int vad_state; + + //parse the label file + while(!mLabelStream.eof()) { + std::getline(mLabelStream, line); + if(line == "") continue; //skip newlines/comments from MLF + if(line[0] == '#') continue; + + std::istringstream& iss = mGenDesiredMatrixStream; + iss.clear(); + iss.str(line); + + //parse the line + //begin + iss >> std::ws >> beg; + if(iss.fail()) { + KALDI_ERR << "Cannot parse column 1 (begin)\n" + << "line: " << line << "\n" + << "file: " << mpLabelFile << "\n"; + } + //end + iss >> std::ws >> end; + if(iss.fail()) { + KALDI_ERR << "Cannot parse column 2 (end)\n" + << "line: " << line << "\n" + << "file: " << mpLabelFile << "\n"; + } + //state tag + iss >> std::ws >> state; + if(iss.fail()) { + KALDI_ERR << "Cannot parse column 3 (state_tag)\n" + << "line: " << line << "\n" + << "file: " << mpLabelFile << "\n"; + } + + if (has_vad) /* an additional column for vad */ + { + iss >> std::ws >> vad_state; + if(iss.fail()) { + KALDI_ERR << "Cannot parse column 4 (vad_state)\n" + << "line: " << line << "\n" + << "file: " << mpLabelFile << "\n"; + } + } + + //fprintf(stderr, "Parsed: %lld %lld %s\n", beg, end, state.c_str()); + + //divide beg/end by sourceRate and round up to get interval of frames + beg = (beg+sourceRate/2)/sourceRate; + end = (end+sourceRate/2)/sourceRate; + //beg = (int)round(beg / (double)sourceRate); + //end = (int)round(end / (double)sourceRate); + + //find the state id + it = mLabelMap.find(state); + if(mLabelMap.end() == it) { + Error(std::string("Unknown state tag: '") + state + "' file:'" + mpLabelFile); + } + state_index = it->second; + + // Fill the desired matrix + for(unsigned long long frame=beg; frame<end; frame++) { + //don't write after matrix... (possible longer transcript than feature file) + if(frame >= (int)rDesired.Rows()) { trunc_frames++; continue; } + + //check the next frame is empty: + if(0.0 != rDesired[frame].Sum()) { + //ERROR!!! + //find out what was previously filled!!! + BaseFloat max = rDesired[frame].Max(); + int idx = -1; + for(int i=0; i<(int)rDesired[frame].Dim(); i++) { + if(rDesired[frame][i] == max) idx = i; + } + for(it=mLabelMap.begin(); it!=mLabelMap.end(); ++it) { + if((int)it->second == idx) break; + } + std::string state_prev = "error"; + if(it != mLabelMap.end()) { + state_prev = it->first; + } + //print the error message + std::ostringstream os; + os << "Frame already assigned to other state, " + << " file: " << mpLabelFile + << " frame: " << frame + << " nframes: " << nFrames + << " sum: " << rDesired[frame].Sum() + << " previously assigned to: " << state_prev << "(" << idx << ")" + << " now should be assigned to: " << state << "(" << state_index << ")" + << "\n"; + Error(os.str()); + } + + //fill the row + rDesired[(size_t)frame][state_index] = 1.0f; + if (has_vad) + rDesired[(size_t)frame][label_map_size + !!vad_state] = 1.0f; + } + } + + mLabelStream.Close(); + + //check the desired matrix (rows sum up to 1.0) + for(size_t i=0; i<rDesired.Rows(); ++i) { + float desired_row_sum = rDesired[i].Sum(); + if(!desired_row_sum == 1.0) { + std::ostringstream os; + os << "Desired vector sum isn't 1.0, " + << " file: " << mpLabelFile + << " row: " << i + << " nframes: " << nFrames + << " content: " << rDesired[i] + << " sum: " << desired_row_sum << "\n"; + Error(os.str()); + } + } + + //warning when truncating many frames + if(trunc_frames > 10) { + std::ostringstream os; + os << "Truncated frames: " << trunc_frames + << " Check sourcerate in features and validity of labels\n"; + Warning(os.str()); + } + + //timer + tim.End(); mGenDesiredMatrixTime += tim.Val(); + } + + + + void + LabelRepository:: + ReadOutputLabelMap(const char* file) + { + assert(mLabelMap.size() == 0); + int i = 0; + std::string state_tag; + std::ifstream in(file); + if(!in.good()) + Error(std::string("Cannot open OutputLabelMapFile: ")+file); + + in >> std::ws; + while(!in.eof()) { + in >> state_tag; + in >> std::ws; + assert(mLabelMap.find(state_tag) == mLabelMap.end()); + mLabelMap[state_tag] = i++; + } + + in.close(); + assert(mLabelMap.size() > 0); + } + + + void + LabelRepository:: + GenDesiredMatrixExt(std::vector<BfMatrix>& rDesired, size_t nFrames, size_t sourceRate, const char* pFeatureLogical) { + switch (mlf_fmt) + { + case MAP: GenDesiredMatrixExtMap(rDesired, nFrames, sourceRate, pFeatureLogical); + break; + case RAW: GenDesiredMatrixExtRaw(rDesired, nFrames, sourceRate, pFeatureLogical); + break; + default: assert(0); + } + } + + void + LabelRepository:: + GenDesiredMatrixExtMap(std::vector<BfMatrix>& rDesired, size_t nFrames, size_t sourceRate, const char* pFeatureLogical) + { + //timer + Timer tim; tim.Start(); + + //Get the MLF stream reference... + IMlfStream& mLabelStream = *mpLabelStream; + //Build the file name of the label + MakeHtkFileName(mpLabelFile, pFeatureLogical, mpLabelDir, mpLabelExt); + + //Find block in MLF file + mLabelStream.Open(mpLabelFile); + if(!mLabelStream.good()) { + Error(std::string("Cannot open label MLF record: ") + mpLabelFile); + } + + + //resize the matrix + if(nFrames < 1) { + KALDI_ERR << "Number of frames:" << nFrames << " is lower than 1!!!\n" + << pFeatureLogical; + } + + size_t prev = rDesired.size(); + rDesired.resize(prev + 1, BfMatrix()); /* state + vad */ + int label_map_size = mLabelMap.size(); + rDesired[prev].Init(nFrames, 1, true); //true: Zero() + + //aux variables + std::string line, state; + unsigned long long beg, end; + size_t state_index; + size_t trunc_frames = 0; + TagToIdMap::iterator it; + + //parse the label file + while(!mLabelStream.eof()) { + std::getline(mLabelStream, line); + if(line == "") continue; //skip newlines/comments from MLF + if(line[0] == '#') continue; + + std::istringstream& iss = mGenDesiredMatrixStream; + iss.clear(); + iss.str(line); + + //parse the line + //begin + iss >> std::ws >> beg; + if(iss.fail()) { + KALDI_ERR << "Cannot parse column 1 (begin)\n" + << "line: " << line << "\n" + << "file: " << mpLabelFile << "\n"; + } + //end + iss >> std::ws >> end; + if(iss.fail()) { + KALDI_ERR << "Cannot parse column 2 (end)\n" + << "line: " << line << "\n" + << "file: " << mpLabelFile << "\n"; + } + //state tag + iss >> std::ws >> state; + if(iss.fail()) { + KALDI_ERR << "Cannot parse column 3 (state_tag)\n" + << "line: " << line << "\n" + << "file: " << mpLabelFile << "\n"; + } + + + //fprintf(stderr, "Parsed: %lld %lld %s\n", beg, end, state.c_str()); + + //divide beg/end by sourceRate and round up to get interval of frames + beg = (beg+sourceRate/2)/sourceRate; + end = (end+sourceRate/2)/sourceRate; + //beg = (int)round(beg / (double)sourceRate); + //end = (int)round(end / (double)sourceRate); + + //find the state id + it = mLabelMap.find(state); + if(mLabelMap.end() == it) { + Error(std::string("Unknown state tag: '") + state + "' file:'" + mpLabelFile); + } + state_index = it->second; + + // Fill the desired matrix + for(unsigned long long frame=beg; frame<end; frame++) { + //don't write after matrix... (possible longer transcript than feature file) + if(frame >= (int)rDesired[prev].Rows()) { trunc_frames++; continue; } + + //check the next frame is empty: + if(0.0 != rDesired[prev][frame][0]) { + //ERROR!!! + //find out what was previously filled!!! + /* + BaseFloat max = rDesired[prev][frame].Max(); + int idx = -1; + for(int i=0; i<(int)rDesired[prev][frame].Dim(); i++) { + if(rDesired[prev][frame][i] == max) idx = i; + } + */ + BaseFloat max = rDesired[prev][frame][0]; + int idx = round(max); + for(it=mLabelMap.begin(); it!=mLabelMap.end(); ++it) { + if((int)it->second == idx) break; + } + std::string state_prev = "error"; + if(it != mLabelMap.end()) { + state_prev = it->first; + } + //print the error message + std::ostringstream os; + os << "Frame already assigned to other state, " + << " file: " << mpLabelFile + << " frame: " << frame + << " nframes: " << nFrames + << " sum: " << max + << " previously assigned to: " << state_prev << "(" << idx << ")" + << " now should be assigned to: " << state << "(" << state_index << ")" + << "\n"; + Error(os.str()); + } + + //fill the row + //rDesired[prev][(size_t)frame][state_index] = 1.0f; + rDesired[prev][(size_t)frame][0] = state_index; + } + } + + mLabelStream.Close(); + /* + //check the desired matrix (rows sum up to 1.0) + for(size_t i=0; i<rDesired[prev].Rows(); ++i) { + float desired_row_sum = rDesired[prev][i].Sum(); + if(!desired_row_sum == 1.0) { + std::ostringstream os; + os << "Desired vector sum isn't 1.0, " + << " file: " << mpLabelFile + << " row: " << i + << " nframes: " << nFrames + << " content: " << rDesired[prev][i] + << " sum: " << desired_row_sum << "\n"; + Error(os.str()); + } + } + */ + + //warning when truncating many frames + if(trunc_frames > 10) { + std::ostringstream os; + os << "Truncated frames: " << trunc_frames + << " Check sourcerate in features and validity of labels\n"; + Warning(os.str()); + } + + //timer + tim.End(); mGenDesiredMatrixTime += tim.Val(); + } + + void + LabelRepository:: + GenDesiredMatrixExtRaw(std::vector<BfMatrix>& rDesired, size_t nFrames, size_t sourceRate, const char* pFeatureLogical) + { + //timer + Timer tim; tim.Start(); + + //Get the MLF stream reference... + IMlfStream& mLabelStream = *mpLabelStream; + //Build the file name of the label + MakeHtkFileName(mpLabelFile, pFeatureLogical, mpLabelDir, mpLabelExt); + + //Find block in MLF file + mLabelStream.Open(mpLabelFile); + if(!mLabelStream.good()) { + Error(std::string("Cannot open label MLF record: ") + mpLabelFile); + } + + + //resize the matrix + if(nFrames < 1) { + KALDI_ERR << "Number of frames:" << nFrames << " is lower than 1!!!\n" + << pFeatureLogical; + } + + size_t prev = rDesired.size(); + rDesired.resize(prev + 1, BfMatrix()); /* state + vad */ + rDesired[prev].Init(nFrames, raw_dim, true); //true: Zero() + + //aux variables + std::string line, state; + unsigned long long beg, end; + size_t trunc_frames = 0; + Vector<BaseFloat> raw; + raw.Init(raw_dim); + + //parse the label file + while(!mLabelStream.eof()) { + std::getline(mLabelStream, line); + if(line == "") continue; //skip newlines/comments from MLF + if(line[0] == '#') continue; + + std::istringstream& iss = mGenDesiredMatrixStream; + iss.clear(); + iss.str(line); + + //parse the line + //begin + iss >> std::ws >> beg; + if(iss.fail()) { + KALDI_ERR << "Cannot parse column 1 (begin)\n" + << "line: " << line << "\n" + << "file: " << mpLabelFile << "\n"; + } + //end + iss >> std::ws >> end; + if(iss.fail()) { + KALDI_ERR << "Cannot parse column 2 (end)\n" + << "line: " << line << "\n" + << "file: " << mpLabelFile << "\n"; + } + + for (size_t i = 0; i < raw_dim; i++) + { + if (iss.eof()) + PError("[label] insufficient columns for the label: %s", mpLabelFile); + iss >> raw[i]; + if (iss.fail()) + PError("[label] cannot parse raw value for the label: %s", mpLabelFile); + } + /* + for (size_t i = 0; i < raw_dim; i++) + fprintf(stderr, "%.3f", raw[i]); + fprintf(stderr, "\n"); + */ + //divide beg/end by sourceRate and round up to get interval of frames + beg = (beg+sourceRate/2)/sourceRate; + if (end == (unsigned long long)-1) + end = rDesired[prev].Rows(); + else + end = (end+sourceRate/2)/sourceRate; + //printf("end:%lld\n", end); + //beg = (int)round(beg / (double)sourceRate); + //end = (int)round(end / (double)sourceRate); + + // Fill the desired matrix + for(unsigned long long frame=beg; frame<end; frame++) { + //don't write after matrix... (possible longer transcript than feature file) + if(frame >= (int)rDesired[prev].Rows()) { trunc_frames++; continue; } + + //check the next frame is empty: + if(0.0 != rDesired[prev][frame][0]) { + //ERROR!!! + //find out what was previously filled!!! + /* + BaseFloat max = rDesired[prev][frame].Max(); + int idx = -1; + for(int i=0; i<(int)rDesired[prev][frame].Dim(); i++) { + if(rDesired[prev][frame][i] == max) idx = i; + } + */ + BaseFloat max = rDesired[prev][frame][0]; + //print the error message + std::ostringstream os; + os << "Frame already assigned to other state, " + << " file: " << mpLabelFile + << " frame: " << frame + << " nframes: " << nFrames + << " sum: " << max + << "\n"; + Error(os.str()); + } + + //fill the row + //rDesired[prev][(size_t)frame][state_index] = 1.0f; + rDesired[prev][(size_t)frame].Copy(raw); + } + } + + mLabelStream.Close(); + /* + //check the desired matrix (rows sum up to 1.0) + for(size_t i=0; i<rDesired[prev].Rows(); ++i) { + float desired_row_sum = rDesired[prev][i].Sum(); + if(!desired_row_sum == 1.0) { + std::ostringstream os; + os << "Desired vector sum isn't 1.0, " + << " file: " << mpLabelFile + << " row: " << i + << " nframes: " << nFrames + << " content: " << rDesired[prev][i] + << " sum: " << desired_row_sum << "\n"; + Error(os.str()); + } + } + */ + + //warning when truncating many frames + if(trunc_frames > 10) { + std::ostringstream os; + os << "Truncated frames: " << trunc_frames + << " Check sourcerate in features and validity of labels\n"; + Warning(os.str()); + } + + //timer + tim.End(); mGenDesiredMatrixTime += tim.Val(); + } + +}//namespace |